upstream/mercurial-mirror Commit - r44417:8a81fa44

1

# match.py - filename matching

1

# match.py - filename matching

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

from __future__ import absolute_import, print_function

8

from __future__ import absolute_import, print_function

9

10

import copy

10

import copy

11

import itertools

11

import itertools

12

import os

12

import os

13

import re

13

import re

14

15

from .i18n import _

15

from .i18n import _

16

from .pycompat import open

16

from .pycompat import open

17

from . import (

17

from . import (

18

encoding,

18

encoding,

19

error,

19

error,

20

pathutil,

20

pathutil,

21

policy,

21

policy,

22

pycompat,

22

pycompat,

23

util,

23

util,

24

)

24

)

25

from .utils import stringutil

25

from .utils import stringutil

26

27

rustmod = policy.importrust('filepatterns')

27

rustmod = policy.importrust('filepatterns')

28

29

allpatternkinds = (

29

allpatternkinds = (

30

b're',

30

b're',

31

b'glob',

31

b'glob',

32

b'path',

32

b'path',

33

b'relglob',

33

b'relglob',

34

b'relpath',

34

b'relpath',

35

b'relre',

35

b'relre',

36

b'rootglob',

36

b'rootglob',

37

b'listfile',

37

b'listfile',

38

b'listfile0',

38

b'listfile0',

39

b'set',

39

b'set',

40

b'include',

40

b'include',

41

b'subinclude',

41

b'subinclude',

42

b'rootfilesin',

42

b'rootfilesin',

43

)

43

)

44

cwdrelativepatternkinds = (b'relpath', b'glob')

44

cwdrelativepatternkinds = (b'relpath', b'glob')

45

46

propertycache = util.propertycache

46

propertycache = util.propertycache

47

48

49

def _rematcher(regex):

49

def _rematcher(regex):

50

'''compile the regexp with the best available regexp engine and return a

50

'''compile the regexp with the best available regexp engine and return a

51

matcher function'''

51

matcher function'''

52

m = util.re.compile(regex)

52

m = util.re.compile(regex)

53

try:

53

try:

54

# slightly faster, provided by facebook's re2 bindings

54

# slightly faster, provided by facebook's re2 bindings

55

return m.test_match

55

return m.test_match

56

except AttributeError:

56

except AttributeError:

57

return m.match

57

return m.match

58

59

60

def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):

60

def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

61

'''Returns the kindpats list with the 'set' patterns expanded to matchers'''

62

matchers = []

62

matchers = []

63

other = []

63

other = []

64

65

for kind, pat, source in kindpats:

65

for kind, pat, source in kindpats:

66

if kind == b'set':

66

if kind == b'set':

67

if ctx is None:

67

if ctx is None:

68

raise error.ProgrammingError(

68

raise error.ProgrammingError(

69

b"fileset expression with no context"

69

b"fileset expression with no context"

70

)

70

)

71

matchers.append(ctx.matchfileset(pat, badfn=badfn))

71

matchers.append(ctx.matchfileset(pat, badfn=badfn))

72

73

if listsubrepos:

73

if listsubrepos:

74

for subpath in ctx.substate:

74

for subpath in ctx.substate:

75

sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)

75

sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

76

pm = prefixdirmatcher(subpath, sm, badfn=badfn)

77

matchers.append(pm)

77

matchers.append(pm)

78

79

continue

79

continue

80

other.append((kind, pat, source))

80

other.append((kind, pat, source))

81

return matchers, other

81

return matchers, other

82

83

84

def _expandsubinclude(kindpats, root):

84

def _expandsubinclude(kindpats, root):

85

'''Returns the list of subinclude matcher args and the kindpats without the

85

'''Returns the list of subinclude matcher args and the kindpats without the

86

subincludes in it.'''

86

subincludes in it.'''

87

relmatchers = []

87

relmatchers = []

88

other = []

88

other = []

89

90

for kind, pat, source in kindpats:

90

for kind, pat, source in kindpats:

91

if kind == b'subinclude':

91

if kind == b'subinclude':

92

sourceroot = pathutil.dirname(util.normpath(source))

92

sourceroot = pathutil.dirname(util.normpath(source))

93

pat = util.pconvert(pat)

93

pat = util.pconvert(pat)

94

path = pathutil.join(sourceroot, pat)

94

path = pathutil.join(sourceroot, pat)

95

96

newroot = pathutil.dirname(path)

96

newroot = pathutil.dirname(path)

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

97

matcherargs = (newroot, b'', [], [b'include:%s' % path])

98

99

prefix = pathutil.canonpath(root, root, newroot)

99

prefix = pathutil.canonpath(root, root, newroot)

100

if prefix:

100

if prefix:

101

prefix += b'/'

101

prefix += b'/'

102

relmatchers.append((prefix, matcherargs))

102

relmatchers.append((prefix, matcherargs))

103

else:

103

else:

104

other.append((kind, pat, source))

104

other.append((kind, pat, source))

105

106

return relmatchers, other

106

return relmatchers, other

107

108

109

def _kindpatsalwaysmatch(kindpats):

109

def _kindpatsalwaysmatch(kindpats):

110

""""Checks whether the kindspats match everything, as e.g.

110

""""Checks whether the kindspats match everything, as e.g.

111

'relpath:.' does.

111

'relpath:.' does.

112

"""

112

"""

113

for kind, pat, source in kindpats:

113

for kind, pat, source in kindpats:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

114

if pat != b'' or kind not in [b'relpath', b'glob']:

115

return False

115

return False

116

return True

116

return True

117

118

119

def _buildkindpatsmatcher(

119

def _buildkindpatsmatcher(

120

matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None

120

matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None

121

):

121

):

122

matchers = []

122

matchers = []

123

fms, kindpats = _expandsets(

123

fms, kindpats = _expandsets(

124

kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn

124

kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn

125

)

125

)

126

if kindpats:

126

if kindpats:

127

m = matchercls(root, kindpats, badfn=badfn)

127

m = matchercls(root, kindpats, badfn=badfn)

128

matchers.append(m)

128

matchers.append(m)

129

if fms:

129

if fms:

130

matchers.extend(fms)

130

matchers.extend(fms)

131

if not matchers:

131

if not matchers:

132

return nevermatcher(badfn=badfn)

132

return nevermatcher(badfn=badfn)

133

if len(matchers) == 1:

133

if len(matchers) == 1:

134

return matchers[0]

134

return matchers[0]

135

return unionmatcher(matchers)

135

return unionmatcher(matchers)

136

137

138

def match(

138

def match(

139

root,

139

root,

140

cwd,

140

cwd,

141

patterns=None,

141

patterns=None,

142

include=None,

142

include=None,

143

exclude=None,

143

exclude=None,

144

default=b'glob',

144

default=b'glob',

145

auditor=None,

145

auditor=None,

146

ctx=None,

146

ctx=None,

147

listsubrepos=False,

147

listsubrepos=False,

148

warn=None,

148

warn=None,

149

badfn=None,

149

badfn=None,

150

icasefs=False,

150

icasefs=False,

151

):

151

):

152

r"""build an object to match a set of file patterns

152

r"""build an object to match a set of file patterns

153

154

arguments:

154

arguments:

155

root - the canonical root of the tree you're matching against

155

root - the canonical root of the tree you're matching against

156

cwd - the current working directory, if relevant

156

cwd - the current working directory, if relevant

157

patterns - patterns to find

157

patterns - patterns to find

158

include - patterns to include (unless they are excluded)

158

include - patterns to include (unless they are excluded)

159

exclude - patterns to exclude (even if they are included)

159

exclude - patterns to exclude (even if they are included)

160

default - if a pattern in patterns has no explicit type, assume this one

160

default - if a pattern in patterns has no explicit type, assume this one

161

auditor - optional path auditor

161

auditor - optional path auditor

162

ctx - optional changecontext

162

ctx - optional changecontext

163

listsubrepos - if True, recurse into subrepositories

163

listsubrepos - if True, recurse into subrepositories

164

warn - optional function used for printing warnings

164

warn - optional function used for printing warnings

165

badfn - optional bad() callback for this matcher instead of the default

165

badfn - optional bad() callback for this matcher instead of the default

166

icasefs - make a matcher for wdir on case insensitive filesystems, which

166

icasefs - make a matcher for wdir on case insensitive filesystems, which

167

normalizes the given patterns to the case in the filesystem

167

normalizes the given patterns to the case in the filesystem

168

169

a pattern is one of:

169

a pattern is one of:

170

'glob:<glob>' - a glob relative to cwd

170

'glob:<glob>' - a glob relative to cwd

171

're:<regexp>' - a regular expression

171

're:<regexp>' - a regular expression

172

'path:<path>' - a path relative to repository root, which is matched

172

'path:<path>' - a path relative to repository root, which is matched

173

recursively

173

recursively

174

'rootfilesin:<path>' - a path relative to repository root, which is

174

'rootfilesin:<path>' - a path relative to repository root, which is

175

matched non-recursively (will not match subdirectories)

175

matched non-recursively (will not match subdirectories)

176

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

176

'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)

177

'relpath:<path>' - a path relative to cwd

177

'relpath:<path>' - a path relative to cwd

178

'relre:<regexp>' - a regexp that needn't match the start of a name

178

'relre:<regexp>' - a regexp that needn't match the start of a name

179

'set:<fileset>' - a fileset expression

179

'set:<fileset>' - a fileset expression

180

'include:<path>' - a file of patterns to read and include

180

'include:<path>' - a file of patterns to read and include

181

'subinclude:<path>' - a file of patterns to match against files under

181

'subinclude:<path>' - a file of patterns to match against files under

182

the same directory

182

the same directory

183

'<something>' - a pattern of the specified default type

183

'<something>' - a pattern of the specified default type

184

185

>>> def _match(root, *args, **kwargs):

185

>>> def _match(root, *args, **kwargs):

186

... return match(util.localpath(root), *args, **kwargs)

186

... return match(util.localpath(root), *args, **kwargs)

187

188

Usually a patternmatcher is returned:

188

Usually a patternmatcher is returned:

189

>>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

189

>>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])

190

190

191

192

Combining 'patterns' with 'include' (resp. 'exclude') gives an

192

Combining 'patterns' with 'include' (resp. 'exclude') gives an

193

intersectionmatcher (resp. a differencematcher):

193

intersectionmatcher (resp. a differencematcher):

194

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

194

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))

195

195

196

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

196

>>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))

197

197

198

199

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

199

Notice that, if 'patterns' is empty, an alwaysmatcher is returned:

200

>>> _match(b'/foo', b'.', [])

200

>>> _match(b'/foo', b'.', [])

201

201

202

203

The 'default' argument determines which kind of pattern is assumed if a

203

The 'default' argument determines which kind of pattern is assumed if a

204

pattern has no prefix:

204

pattern has no prefix:

205

>>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')

205

>>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')

206

206

207

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

207

>>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')

208

208

209

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

209

>>> _match(b'/foo', b'.', [b'main.py'], default=b're')

210

210

211

212

The primary use of matchers is to check whether a value (usually a file

212

The primary use of matchers is to check whether a value (usually a file

213

name) matches againset one of the patterns given at initialization. There

213

name) matches againset one of the patterns given at initialization. There

214

are two ways of doing this check.

214

are two ways of doing this check.

215

216

>>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])

216

>>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])

217

218

1. Calling the matcher with a file name returns True if any pattern

218

1. Calling the matcher with a file name returns True if any pattern

219

matches that file name:

219

matches that file name:

220

>>> m(b'a')

220

>>> m(b'a')

221

True

221

True

222

>>> m(b'main.c')

222

>>> m(b'main.c')

223

True

223

True

224

>>> m(b'test.py')

224

>>> m(b'test.py')

225

False

225

False

226

227

2. Using the exact() method only returns True if the file name matches one

227

2. Using the exact() method only returns True if the file name matches one

228

of the exact patterns (i.e. not re: or glob: patterns):

228

of the exact patterns (i.e. not re: or glob: patterns):

229

>>> m.exact(b'a')

229

>>> m.exact(b'a')

230

True

230

True

231

>>> m.exact(b'main.c')

231

>>> m.exact(b'main.c')

232

False

232

False

233

"""

233

"""

234

assert os.path.isabs(root)

234

assert os.path.isabs(root)

235

cwd = ~~util~~.~~normpath~~(os.path.join(root, cwd))

235

cwd = os.path.join(root, util.localpath(cwd))

236

normalize = _donormalize

236

normalize = _donormalize

237

if icasefs:

237

if icasefs:

238

dirstate = ctx.repo().dirstate

238

dirstate = ctx.repo().dirstate

239

dsnormalize = dirstate.normalize

239

dsnormalize = dirstate.normalize

240

241

def normalize(patterns, default, root, cwd, auditor, warn):

241

def normalize(patterns, default, root, cwd, auditor, warn):

242

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

242

kp = _donormalize(patterns, default, root, cwd, auditor, warn)

243

kindpats = []

243

kindpats = []

244

for kind, pats, source in kp:

244

for kind, pats, source in kp:

245

if kind not in (b're', b'relre'): # regex can't be normalized

245

if kind not in (b're', b'relre'): # regex can't be normalized

246

p = pats

246

p = pats

247

pats = dsnormalize(pats)

247

pats = dsnormalize(pats)

248

249

# Preserve the original to handle a case only rename.

249

# Preserve the original to handle a case only rename.

250

if p != pats and p in dirstate:

250

if p != pats and p in dirstate:

251

kindpats.append((kind, p, source))

251

kindpats.append((kind, p, source))

252

253

kindpats.append((kind, pats, source))

253

kindpats.append((kind, pats, source))

254

return kindpats

254

return kindpats

255

256

if patterns:

256

if patterns:

257

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

257

kindpats = normalize(patterns, default, root, cwd, auditor, warn)

258

if _kindpatsalwaysmatch(kindpats):

258

if _kindpatsalwaysmatch(kindpats):

259

m = alwaysmatcher(badfn)

259

m = alwaysmatcher(badfn)

260

else:

260

else:

261

m = _buildkindpatsmatcher(

261

m = _buildkindpatsmatcher(

262

patternmatcher,

262

patternmatcher,

263

root,

263

root,

264

kindpats,

264

kindpats,

265

ctx=ctx,

265

ctx=ctx,

266

listsubrepos=listsubrepos,

266

listsubrepos=listsubrepos,

267

badfn=badfn,

267

badfn=badfn,

268

)

268

)

269

else:

269

else:

270

# It's a little strange that no patterns means to match everything.

270

# It's a little strange that no patterns means to match everything.

271

# Consider changing this to match nothing (probably using nevermatcher).

271

# Consider changing this to match nothing (probably using nevermatcher).

272

m = alwaysmatcher(badfn)

272

m = alwaysmatcher(badfn)

273

274

if include:

274

if include:

275

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

275

kindpats = normalize(include, b'glob', root, cwd, auditor, warn)

276

im = _buildkindpatsmatcher(

276

im = _buildkindpatsmatcher(

277

includematcher,

277

includematcher,

278

root,

278

root,

279

kindpats,

279

kindpats,

280

ctx=ctx,

280

ctx=ctx,

281

listsubrepos=listsubrepos,

281

listsubrepos=listsubrepos,

282

badfn=None,

282

badfn=None,

283

)

283

)

284

m = intersectmatchers(m, im)

284

m = intersectmatchers(m, im)

285

if exclude:

285

if exclude:

286

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

286

kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)

287

em = _buildkindpatsmatcher(

287

em = _buildkindpatsmatcher(

288

includematcher,

288

includematcher,

289

root,

289

root,

290

kindpats,

290

kindpats,

291

ctx=ctx,

291

ctx=ctx,

292

listsubrepos=listsubrepos,

292

listsubrepos=listsubrepos,

293

badfn=None,

293

badfn=None,

294

)

294

)

295

m = differencematcher(m, em)

295

m = differencematcher(m, em)

296

return m

296

return m

297

298

299

def exact(files, badfn=None):

299

def exact(files, badfn=None):

300

return exactmatcher(files, badfn=badfn)

300

return exactmatcher(files, badfn=badfn)

301

302

303

def always(badfn=None):

303

def always(badfn=None):

304

return alwaysmatcher(badfn)

304

return alwaysmatcher(badfn)

305

306

307

def never(badfn=None):

307

def never(badfn=None):

308

return nevermatcher(badfn)

308

return nevermatcher(badfn)

309

310

311

def badmatch(match, badfn):

311

def badmatch(match, badfn):

312

"""Make a copy of the given matcher, replacing its bad method with the given

312

"""Make a copy of the given matcher, replacing its bad method with the given

313

one.

313

one.

314

"""

314

"""

315

m = copy.copy(match)

315

m = copy.copy(match)

316

m.bad = badfn

316

m.bad = badfn

317

return m

317

return m

318

319

320

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

320

def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):

321

'''Convert 'kind:pat' from the patterns list to tuples with kind and

321

'''Convert 'kind:pat' from the patterns list to tuples with kind and

322

normalized and rooted patterns and with listfiles expanded.'''

322

normalized and rooted patterns and with listfiles expanded.'''

323

kindpats = []

323

kindpats = []

324

for kind, pat in [_patsplit(p, default) for p in patterns]:

324

for kind, pat in [_patsplit(p, default) for p in patterns]:

325

if kind in cwdrelativepatternkinds:

325

if kind in cwdrelativepatternkinds:

326

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

326

pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)

327

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

327

elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):

328

pat = util.normpath(pat)

328

pat = util.normpath(pat)

329

elif kind in (b'listfile', b'listfile0'):

329

elif kind in (b'listfile', b'listfile0'):

330

try:

330

try:

331

files = util.readfile(pat)

331

files = util.readfile(pat)

332

if kind == b'listfile0':

332

if kind == b'listfile0':

333

files = files.split(b'\0')

333

files = files.split(b'\0')

334

else:

334

else:

335

files = files.splitlines()

335

files = files.splitlines()

336

files = [f for f in files if f]

336

files = [f for f in files if f]

337

except EnvironmentError:

337

except EnvironmentError:

338

raise error.Abort(_(b"unable to read file list (%s)") % pat)

338

raise error.Abort(_(b"unable to read file list (%s)") % pat)

339

for k, p, source in _donormalize(

339

for k, p, source in _donormalize(

340

files, default, root, cwd, auditor, warn

340

files, default, root, cwd, auditor, warn

341

):

341

):

342

kindpats.append((k, p, pat))

342

kindpats.append((k, p, pat))

343

continue

343

continue

344

elif kind == b'include':

344

elif kind == b'include':

345

try:

345

try:

346

fullpath = os.path.join(root, util.localpath(pat))

346

fullpath = os.path.join(root, util.localpath(pat))

347

includepats = readpatternfile(fullpath, warn)

347

includepats = readpatternfile(fullpath, warn)

348

for k, p, source in _donormalize(

348

for k, p, source in _donormalize(

349

includepats, default, root, cwd, auditor, warn

349

includepats, default, root, cwd, auditor, warn

350

):

350

):

351

kindpats.append((k, p, source or pat))

351

kindpats.append((k, p, source or pat))

352

except error.Abort as inst:

352

except error.Abort as inst:

353

raise error.Abort(

353

raise error.Abort(

354

b'%s: %s'

354

b'%s: %s'

355

% (pat, inst[0]) # pytype: disable=unsupported-operands

355

% (pat, inst[0]) # pytype: disable=unsupported-operands

356

)

356

)

357

except IOError as inst:

357

except IOError as inst:

358

if warn:

358

if warn:

359

warn(

359

warn(

360

_(b"skipping unreadable pattern file '%s': %s\n")

360

_(b"skipping unreadable pattern file '%s': %s\n")

361

% (pat, stringutil.forcebytestr(inst.strerror))

361

% (pat, stringutil.forcebytestr(inst.strerror))

362

)

362

)

363

continue

363

continue

364

# else: re or relre - which cannot be normalized

364

# else: re or relre - which cannot be normalized

365

kindpats.append((kind, pat, b''))

365

kindpats.append((kind, pat, b''))

366

return kindpats

366

return kindpats

367

368

369

class basematcher(object):

369

class basematcher(object):

370

def __init__(self, badfn=None):

370

def __init__(self, badfn=None):

371

if badfn is not None:

371

if badfn is not None:

372

self.bad = badfn

372

self.bad = badfn

373

374

def __call__(self, fn):

374

def __call__(self, fn):

375

return self.matchfn(fn)

375

return self.matchfn(fn)

376

377

# Callbacks related to how the matcher is used by dirstate.walk.

377

# Callbacks related to how the matcher is used by dirstate.walk.

378

# Subscribers to these events must monkeypatch the matcher object.

378

# Subscribers to these events must monkeypatch the matcher object.

379

def bad(self, f, msg):

379

def bad(self, f, msg):

380

'''Callback from dirstate.walk for each explicit file that can't be

380

'''Callback from dirstate.walk for each explicit file that can't be

381

found/accessed, with an error message.'''

381

found/accessed, with an error message.'''

382

383

# If an traversedir is set, it will be called when a directory discovered

383

# If an traversedir is set, it will be called when a directory discovered

384

# by recursive traversal is visited.

384

# by recursive traversal is visited.

385

traversedir = None

385

traversedir = None

386

387

@propertycache

387

@propertycache

388

def _files(self):

388

def _files(self):

389

return []

389

return []

390

391

def files(self):

391

def files(self):

392

'''Explicitly listed files or patterns or roots:

392

'''Explicitly listed files or patterns or roots:

393

if no patterns or .always(): empty list,

393

if no patterns or .always(): empty list,

394

if exact: list exact files,

394

if exact: list exact files,

395

if not .anypats(): list all files and dirs,

395

if not .anypats(): list all files and dirs,

396

else: optimal roots'''

396

else: optimal roots'''

397

return self._files

397

return self._files

398

399

@propertycache

399

@propertycache

400

def _fileset(self):

400

def _fileset(self):

401

return set(self._files)

401

return set(self._files)

402

403

def exact(self, f):

403

def exact(self, f):

404

'''Returns True if f is in .files().'''

404

'''Returns True if f is in .files().'''

405

return f in self._fileset

405

return f in self._fileset

406

407

def matchfn(self, f):

407

def matchfn(self, f):

408

return False

408

return False

409

410

def visitdir(self, dir):

410

def visitdir(self, dir):

411

'''Decides whether a directory should be visited based on whether it

411

'''Decides whether a directory should be visited based on whether it

412

has potential matches in it or one of its subdirectories. This is

412

has potential matches in it or one of its subdirectories. This is

413

based on the match's primary, included, and excluded patterns.

413

based on the match's primary, included, and excluded patterns.

414

415

Returns the string 'all' if the given directory and all subdirectories

415

Returns the string 'all' if the given directory and all subdirectories

416

should be visited. Otherwise returns True or False indicating whether

416

should be visited. Otherwise returns True or False indicating whether

417

the given directory should be visited.

417

the given directory should be visited.

418

'''

418

'''

419

return True

419

return True

420

421

def visitchildrenset(self, dir):

421

def visitchildrenset(self, dir):

422

'''Decides whether a directory should be visited based on whether it

422

'''Decides whether a directory should be visited based on whether it

423

has potential matches in it or one of its subdirectories, and

423

has potential matches in it or one of its subdirectories, and

424

potentially lists which subdirectories of that directory should be

424

potentially lists which subdirectories of that directory should be

425

visited. This is based on the match's primary, included, and excluded

425

visited. This is based on the match's primary, included, and excluded

426

patterns.

426

patterns.

427

428

This function is very similar to 'visitdir', and the following mapping

428

This function is very similar to 'visitdir', and the following mapping

429

can be applied:

429

can be applied:

430

431

visitdir | visitchildrenlist

431

visitdir | visitchildrenlist

432

----------+-------------------

432

----------+-------------------

433

False | set()

433

False | set()

434

'all' | 'all'

434

'all' | 'all'

435

True | 'this' OR non-empty set of subdirs -or files- to visit

435

True | 'this' OR non-empty set of subdirs -or files- to visit

436

437

Example:

437

Example:

438

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

438

Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return

439

the following values (assuming the implementation of visitchildrenset

439

the following values (assuming the implementation of visitchildrenset

440

is capable of recognizing this; some implementations are not).

440

is capable of recognizing this; some implementations are not).

441

442

'' -> {'foo', 'qux'}

442

'' -> {'foo', 'qux'}

443

'baz' -> set()

443

'baz' -> set()

444

'foo' -> {'bar'}

444

'foo' -> {'bar'}

445

# Ideally this would be 'all', but since the prefix nature of matchers

445

# Ideally this would be 'all', but since the prefix nature of matchers

446

# is applied to the entire matcher, we have to downgrade this to

446

# is applied to the entire matcher, we have to downgrade this to

447

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

447

# 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed

448

# in.

448

# in.

449

'foo/bar' -> 'this'

449

'foo/bar' -> 'this'

450

'qux' -> 'this'

450

'qux' -> 'this'

451

452

Important:

452

Important:

453

Most matchers do not know if they're representing files or

453

Most matchers do not know if they're representing files or

454

directories. They see ['path:dir/f'] and don't know whether 'f' is a

454

directories. They see ['path:dir/f'] and don't know whether 'f' is a

455

file or a directory, so visitchildrenset('dir') for most matchers will

455

file or a directory, so visitchildrenset('dir') for most matchers will

456

return {'f'}, but if the matcher knows it's a file (like exactmatcher

456

return {'f'}, but if the matcher knows it's a file (like exactmatcher

457

does), it may return 'this'. Do not rely on the return being a set

457

does), it may return 'this'. Do not rely on the return being a set

458

indicating that there are no files in this dir to investigate (or

458

indicating that there are no files in this dir to investigate (or

459

equivalently that if there are files to investigate in 'dir' that it

459

equivalently that if there are files to investigate in 'dir' that it

460

will always return 'this').

460

will always return 'this').

461

'''

461

'''

462

return b'this'

462

return b'this'

463

464

def always(self):

464

def always(self):

465

'''Matcher will match everything and .files() will be empty --

465

'''Matcher will match everything and .files() will be empty --

466

optimization might be possible.'''

466

optimization might be possible.'''

467

return False

467

return False

468

469

def isexact(self):

469

def isexact(self):

470

'''Matcher will match exactly the list of files in .files() --

470

'''Matcher will match exactly the list of files in .files() --

471

optimization might be possible.'''

471

optimization might be possible.'''

472

return False

472

return False

473

474

def prefix(self):

474

def prefix(self):

475

'''Matcher will match the paths in .files() recursively --

475

'''Matcher will match the paths in .files() recursively --

476

optimization might be possible.'''

476

optimization might be possible.'''

477

return False

477

return False

478

479

def anypats(self):

479

def anypats(self):

480

'''None of .always(), .isexact(), and .prefix() is true --

480

'''None of .always(), .isexact(), and .prefix() is true --

481

optimizations will be difficult.'''

481

optimizations will be difficult.'''

482

return not self.always() and not self.isexact() and not self.prefix()

482

return not self.always() and not self.isexact() and not self.prefix()

483

484

485

class alwaysmatcher(basematcher):

485

class alwaysmatcher(basematcher):

486

'''Matches everything.'''

486

'''Matches everything.'''

487

488

def __init__(self, badfn=None):

488

def __init__(self, badfn=None):

489

super(alwaysmatcher, self).__init__(badfn)

489

super(alwaysmatcher, self).__init__(badfn)

490

491

def always(self):

491

def always(self):

492

return True

492

return True

493

494

def matchfn(self, f):

494

def matchfn(self, f):

495

return True

495

return True

496

497

def visitdir(self, dir):

497

def visitdir(self, dir):

498

return b'all'

498

return b'all'

499

500

def visitchildrenset(self, dir):

500

def visitchildrenset(self, dir):

501

return b'all'

501

return b'all'

502

503

def __repr__(self):

503

def __repr__(self):

504

return r'<alwaysmatcher>'

504

return r'<alwaysmatcher>'

505

506

507

class nevermatcher(basematcher):

507

class nevermatcher(basematcher):

508

'''Matches nothing.'''

508

'''Matches nothing.'''

509

510

def __init__(self, badfn=None):

510

def __init__(self, badfn=None):

511

super(nevermatcher, self).__init__(badfn)

511

super(nevermatcher, self).__init__(badfn)

512

513

# It's a little weird to say that the nevermatcher is an exact matcher

513

# It's a little weird to say that the nevermatcher is an exact matcher

514

# or a prefix matcher, but it seems to make sense to let callers take

514

# or a prefix matcher, but it seems to make sense to let callers take

515

# fast paths based on either. There will be no exact matches, nor any

515

# fast paths based on either. There will be no exact matches, nor any

516

# prefixes (files() returns []), so fast paths iterating over them should

516

# prefixes (files() returns []), so fast paths iterating over them should

517

# be efficient (and correct).

517

# be efficient (and correct).

518

def isexact(self):

518

def isexact(self):

519

return True

519

return True

520

521

def prefix(self):

521

def prefix(self):

522

return True

522

return True

523

524

def visitdir(self, dir):

524

def visitdir(self, dir):

525

return False

525

return False

526

527

def visitchildrenset(self, dir):

527

def visitchildrenset(self, dir):

528

return set()

528

return set()

529

530

def __repr__(self):

530

def __repr__(self):

531

return r'<nevermatcher>'

531

return r'<nevermatcher>'

532

533

534

class predicatematcher(basematcher):

534

class predicatematcher(basematcher):

535

"""A matcher adapter for a simple boolean function"""

535

"""A matcher adapter for a simple boolean function"""

536

537

def __init__(self, predfn, predrepr=None, badfn=None):

537

def __init__(self, predfn, predrepr=None, badfn=None):

538

super(predicatematcher, self).__init__(badfn)

538

super(predicatematcher, self).__init__(badfn)

539

self.matchfn = predfn

539

self.matchfn = predfn

540

self._predrepr = predrepr

540

self._predrepr = predrepr

541

542

@encoding.strmethod

542

@encoding.strmethod

543

def __repr__(self):

543

def __repr__(self):

544

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

544

s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(

545

self.matchfn

545

self.matchfn

546

)

546

)

547

return b'<predicatenmatcher pred=%s>' % s

547

return b'<predicatenmatcher pred=%s>' % s

548

549

550

class patternmatcher(basematcher):

550

class patternmatcher(basematcher):

551

r"""Matches a set of (kind, pat, source) against a 'root' directory.

551

r"""Matches a set of (kind, pat, source) against a 'root' directory.

552

553

>>> kindpats = [

553

>>> kindpats = [

554

... (b're', br'.*\.c$', b''),

554

... (b're', br'.*\.c$', b''),

555

... (b'path', b'foo/a', b''),

555

... (b'path', b'foo/a', b''),

556

... (b'relpath', b'b', b''),

556

... (b'relpath', b'b', b''),

557

... (b'glob', b'*.h', b''),

557

... (b'glob', b'*.h', b''),

558

... ]

558

... ]

559

>>> m = patternmatcher(b'foo', kindpats)

559

>>> m = patternmatcher(b'foo', kindpats)

560

>>> m(b'main.c') # matches re:.*\.c$

560

>>> m(b'main.c') # matches re:.*\.c$

561

True

561

True

562

>>> m(b'b.txt')

562

>>> m(b'b.txt')

563

False

563

False

564

>>> m(b'foo/a') # matches path:foo/a

564

>>> m(b'foo/a') # matches path:foo/a

565

True

565

True

566

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

566

>>> m(b'a') # does not match path:b, since 'root' is 'foo'

567

False

567

False

568

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

568

>>> m(b'b') # matches relpath:b, since 'root' is 'foo'

569

True

569

True

570

>>> m(b'lib.h') # matches glob:*.h

570

>>> m(b'lib.h') # matches glob:*.h

571

True

571

True

572

573

>>> m.files()

573

>>> m.files()

574

['', 'foo/a', 'b', '']

574

['', 'foo/a', 'b', '']

575

>>> m.exact(b'foo/a')

575

>>> m.exact(b'foo/a')

576

True

576

True

577

>>> m.exact(b'b')

577

>>> m.exact(b'b')

578

True

578

True

579

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

579

>>> m.exact(b'lib.h') # exact matches are for (rel)path kinds

580

False

580

False

581

"""

581

"""

582

583

def __init__(self, root, kindpats, badfn=None):

583

def __init__(self, root, kindpats, badfn=None):

584

super(patternmatcher, self).__init__(badfn)

584

super(patternmatcher, self).__init__(badfn)

585

586

self._files = _explicitfiles(kindpats)

586

self._files = _explicitfiles(kindpats)

587

self._prefix = _prefix(kindpats)

587

self._prefix = _prefix(kindpats)

588

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

588

self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)

589

590

@propertycache

590

@propertycache

591

def _dirs(self):

591

def _dirs(self):

592

return set(pathutil.dirs(self._fileset))

592

return set(pathutil.dirs(self._fileset))

593

594

def visitdir(self, dir):

594

def visitdir(self, dir):

595

if self._prefix and dir in self._fileset:

595

if self._prefix and dir in self._fileset:

596

return b'all'

596

return b'all'

597

return (

597

return (

598

dir in self._fileset

598

dir in self._fileset

599

or dir in self._dirs

599

or dir in self._dirs

600

or any(

600

or any(

601

parentdir in self._fileset

601

parentdir in self._fileset

602

for parentdir in pathutil.finddirs(dir)

602

for parentdir in pathutil.finddirs(dir)

603

)

603

)

604

)

604

)

605

606

def visitchildrenset(self, dir):

606

def visitchildrenset(self, dir):

607

ret = self.visitdir(dir)

607

ret = self.visitdir(dir)

608

if ret is True:

608

if ret is True:

609

return b'this'

609

return b'this'

610

elif not ret:

610

elif not ret:

611

return set()

611

return set()

612

assert ret == b'all'

612

assert ret == b'all'

613

return b'all'

613

return b'all'

614

615

def prefix(self):

615

def prefix(self):

616

return self._prefix

616

return self._prefix

617

618

@encoding.strmethod

618

@encoding.strmethod

619

def __repr__(self):

619

def __repr__(self):

620

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

620

return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)

621

622

623

# This is basically a reimplementation of pathutil.dirs that stores the

623

# This is basically a reimplementation of pathutil.dirs that stores the

624

# children instead of just a count of them, plus a small optional optimization

624

# children instead of just a count of them, plus a small optional optimization

625

# to avoid some directories we don't need.

625

# to avoid some directories we don't need.

626

class _dirchildren(object):

626

class _dirchildren(object):

627

def __init__(self, paths, onlyinclude=None):

627

def __init__(self, paths, onlyinclude=None):

628

self._dirs = {}

628

self._dirs = {}

629

self._onlyinclude = onlyinclude or []

629

self._onlyinclude = onlyinclude or []

630

addpath = self.addpath

630

addpath = self.addpath

631

for f in paths:

631

for f in paths:

632

addpath(f)

632

addpath(f)

633

634

def addpath(self, path):

634

def addpath(self, path):

635

if path == b'':

635

if path == b'':

636

return

636

return

637

dirs = self._dirs

637

dirs = self._dirs

638

findsplitdirs = _dirchildren._findsplitdirs

638

findsplitdirs = _dirchildren._findsplitdirs

639

for d, b in findsplitdirs(path):

639

for d, b in findsplitdirs(path):

640

if d not in self._onlyinclude:

640

if d not in self._onlyinclude:

641

continue

641

continue

642

dirs.setdefault(d, set()).add(b)

642

dirs.setdefault(d, set()).add(b)

643

644

@staticmethod

644

@staticmethod

645

def _findsplitdirs(path):

645

def _findsplitdirs(path):

646

# yields (dirname, basename) tuples, walking back to the root. This is

646

# yields (dirname, basename) tuples, walking back to the root. This is

647

# very similar to pathutil.finddirs, except:

647

# very similar to pathutil.finddirs, except:

648

# - produces a (dirname, basename) tuple, not just 'dirname'

648

# - produces a (dirname, basename) tuple, not just 'dirname'

649

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

649

# Unlike manifest._splittopdir, this does not suffix `dirname` with a

650

# slash.

650

# slash.

651

oldpos = len(path)

651

oldpos = len(path)

652

pos = path.rfind(b'/')

652

pos = path.rfind(b'/')

653

while pos != -1:

653

while pos != -1:

654

yield path[:pos], path[pos + 1 : oldpos]

654

yield path[:pos], path[pos + 1 : oldpos]

655

oldpos = pos

655

oldpos = pos

656

pos = path.rfind(b'/', 0, pos)

656

pos = path.rfind(b'/', 0, pos)

657

yield b'', path[:oldpos]

657

yield b'', path[:oldpos]

658

659

def get(self, path):

659

def get(self, path):

660

return self._dirs.get(path, set())

660

return self._dirs.get(path, set())

661

662

663

class includematcher(basematcher):

663

class includematcher(basematcher):

664

def __init__(self, root, kindpats, badfn=None):

664

def __init__(self, root, kindpats, badfn=None):

665

super(includematcher, self).__init__(badfn)

665

super(includematcher, self).__init__(badfn)

666

667

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

667

self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)

668

self._prefix = _prefix(kindpats)

668

self._prefix = _prefix(kindpats)

669

roots, dirs, parents = _rootsdirsandparents(kindpats)

669

roots, dirs, parents = _rootsdirsandparents(kindpats)

670

# roots are directories which are recursively included.

670

# roots are directories which are recursively included.

671

self._roots = set(roots)

671

self._roots = set(roots)

672

# dirs are directories which are non-recursively included.

672

# dirs are directories which are non-recursively included.

673

self._dirs = set(dirs)

673

self._dirs = set(dirs)

674

# parents are directories which are non-recursively included because

674

# parents are directories which are non-recursively included because

675

# they are needed to get to items in _dirs or _roots.

675

# they are needed to get to items in _dirs or _roots.

676

self._parents = parents

676

self._parents = parents

677

678

def visitdir(self, dir):

678

def visitdir(self, dir):

679

if self._prefix and dir in self._roots:

679

if self._prefix and dir in self._roots:

680

return b'all'

680

return b'all'

681

return (

681

return (

682

dir in self._roots

682

dir in self._roots

683

or dir in self._dirs

683

or dir in self._dirs

684

or dir in self._parents

684

or dir in self._parents

685

or any(

685

or any(

686

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

686

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

687

)

687

)

688

)

688

)

689

690

@propertycache

690

@propertycache

691

def _allparentschildren(self):

691

def _allparentschildren(self):

692

# It may seem odd that we add dirs, roots, and parents, and then

692

# It may seem odd that we add dirs, roots, and parents, and then

693

# restrict to only parents. This is to catch the case of:

693

# restrict to only parents. This is to catch the case of:

694

# dirs = ['foo/bar']

694

# dirs = ['foo/bar']

695

# parents = ['foo']

695

# parents = ['foo']

696

# if we asked for the children of 'foo', but had only added

696

# if we asked for the children of 'foo', but had only added

697

# self._parents, we wouldn't be able to respond ['bar'].

697

# self._parents, we wouldn't be able to respond ['bar'].

698

return _dirchildren(

698

return _dirchildren(

699

itertools.chain(self._dirs, self._roots, self._parents),

699

itertools.chain(self._dirs, self._roots, self._parents),

700

onlyinclude=self._parents,

700

onlyinclude=self._parents,

701

)

701

)

702

703

def visitchildrenset(self, dir):

703

def visitchildrenset(self, dir):

704

if self._prefix and dir in self._roots:

704

if self._prefix and dir in self._roots:

705

return b'all'

705

return b'all'

706

# Note: this does *not* include the 'dir in self._parents' case from

706

# Note: this does *not* include the 'dir in self._parents' case from

707

# visitdir, that's handled below.

707

# visitdir, that's handled below.

708

if (

708

if (

709

b'' in self._roots

709

b'' in self._roots

710

or dir in self._roots

710

or dir in self._roots

711

or dir in self._dirs

711

or dir in self._dirs

712

or any(

712

or any(

713

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

713

parentdir in self._roots for parentdir in pathutil.finddirs(dir)

714

)

714

)

715

):

715

):

716

return b'this'

716

return b'this'

717

718

if dir in self._parents:

718

if dir in self._parents:

719

return self._allparentschildren.get(dir) or set()

719

return self._allparentschildren.get(dir) or set()

720

return set()

720

return set()

721

722

@encoding.strmethod

722

@encoding.strmethod

723

def __repr__(self):

723

def __repr__(self):

724

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

724

return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)

725

726

727

class exactmatcher(basematcher):

727

class exactmatcher(basematcher):

728

r'''Matches the input files exactly. They are interpreted as paths, not

728

r'''Matches the input files exactly. They are interpreted as paths, not

729

patterns (so no kind-prefixes).

729

patterns (so no kind-prefixes).

730

731

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

731

>>> m = exactmatcher([b'a.txt', br're:.*\.c$'])

732

>>> m(b'a.txt')

732

>>> m(b'a.txt')

733

True

733

True

734

>>> m(b'b.txt')

734

>>> m(b'b.txt')

735

False

735

False

736

737

Input files that would be matched are exactly those returned by .files()

737

Input files that would be matched are exactly those returned by .files()

738

>>> m.files()

738

>>> m.files()

739

['a.txt', 're:.*\\.c$']

739

['a.txt', 're:.*\\.c$']

740

741

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

741

So pattern 're:.*\.c$' is not considered as a regex, but as a file name

742

>>> m(b'main.c')

742

>>> m(b'main.c')

743

False

743

False

744

>>> m(br're:.*\.c$')

744

>>> m(br're:.*\.c$')

745

True

745

True

746

'''

746

'''

747

748

def __init__(self, files, badfn=None):

748

def __init__(self, files, badfn=None):

749

super(exactmatcher, self).__init__(badfn)

749

super(exactmatcher, self).__init__(badfn)

750

751

if isinstance(files, list):

751

if isinstance(files, list):

752

self._files = files

752

self._files = files

753

else:

753

else:

754

self._files = list(files)

754

self._files = list(files)

755

756

matchfn = basematcher.exact

756

matchfn = basematcher.exact

757

758

@propertycache

758

@propertycache

759

def _dirs(self):

759

def _dirs(self):

760

return set(pathutil.dirs(self._fileset))

760

return set(pathutil.dirs(self._fileset))

761

762

def visitdir(self, dir):

762

def visitdir(self, dir):

763

return dir in self._dirs

763

return dir in self._dirs

764

765

def visitchildrenset(self, dir):

765

def visitchildrenset(self, dir):

766

if not self._fileset or dir not in self._dirs:

766

if not self._fileset or dir not in self._dirs:

767

return set()

767

return set()

768

769

candidates = self._fileset | self._dirs - {b''}

769

candidates = self._fileset | self._dirs - {b''}

770

if dir != b'':

770

if dir != b'':

771

d = dir + b'/'

771

d = dir + b'/'

772

candidates = set(c[len(d) :] for c in candidates if c.startswith(d))

772

candidates = set(c[len(d) :] for c in candidates if c.startswith(d))

773

# self._dirs includes all of the directories, recursively, so if

773

# self._dirs includes all of the directories, recursively, so if

774

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

774

# we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',

775

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

775

# 'foo/bar' in it. Thus we can safely ignore a candidate that has a

776

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

776

# '/' in it, indicating a it's for a subdir-of-a-subdir; the

777

# immediate subdir will be in there without a slash.

777

# immediate subdir will be in there without a slash.

778

ret = {c for c in candidates if b'/' not in c}

778

ret = {c for c in candidates if b'/' not in c}

779

# We really do not expect ret to be empty, since that would imply that

779

# We really do not expect ret to be empty, since that would imply that

780

# there's something in _dirs that didn't have a file in _fileset.

780

# there's something in _dirs that didn't have a file in _fileset.

781

assert ret

781

assert ret

782

return ret

782

return ret

783

784

def isexact(self):

784

def isexact(self):

785

return True

785

return True

786

787

@encoding.strmethod

787

@encoding.strmethod

788

def __repr__(self):

788

def __repr__(self):

789

return b'<exactmatcher files=%r>' % self._files

789

return b'<exactmatcher files=%r>' % self._files

790

791

792

class differencematcher(basematcher):

792

class differencematcher(basematcher):

793

'''Composes two matchers by matching if the first matches and the second

793

'''Composes two matchers by matching if the first matches and the second

794

does not.

794

does not.

795

796

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

796

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

797

'''

797

'''

798

799

def __init__(self, m1, m2):

799

def __init__(self, m1, m2):

800

super(differencematcher, self).__init__()

800

super(differencematcher, self).__init__()

801

self._m1 = m1

801

self._m1 = m1

802

self._m2 = m2

802

self._m2 = m2

803

self.bad = m1.bad

803

self.bad = m1.bad

804

self.traversedir = m1.traversedir

804

self.traversedir = m1.traversedir

805

806

def matchfn(self, f):

806

def matchfn(self, f):

807

return self._m1(f) and not self._m2(f)

807

return self._m1(f) and not self._m2(f)

808

809

@propertycache

809

@propertycache

810

def _files(self):

810

def _files(self):

811

if self.isexact():

811

if self.isexact():

812

return [f for f in self._m1.files() if self(f)]

812

return [f for f in self._m1.files() if self(f)]

813

# If m1 is not an exact matcher, we can't easily figure out the set of

813

# If m1 is not an exact matcher, we can't easily figure out the set of

814

# files, because its files() are not always files. For example, if

814

# files, because its files() are not always files. For example, if

815

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

815

# m1 is "path:dir" and m2 is "rootfileins:.", we don't

816

# want to remove "dir" from the set even though it would match m2,

816

# want to remove "dir" from the set even though it would match m2,

817

# because the "dir" in m1 may not be a file.

817

# because the "dir" in m1 may not be a file.

818

return self._m1.files()

818

return self._m1.files()

819

820

def visitdir(self, dir):

820

def visitdir(self, dir):

821

if self._m2.visitdir(dir) == b'all':

821

if self._m2.visitdir(dir) == b'all':

822

return False

822

return False

823

elif not self._m2.visitdir(dir):

823

elif not self._m2.visitdir(dir):

824

# m2 does not match dir, we can return 'all' here if possible

824

# m2 does not match dir, we can return 'all' here if possible

825

return self._m1.visitdir(dir)

825

return self._m1.visitdir(dir)

826

return bool(self._m1.visitdir(dir))

826

return bool(self._m1.visitdir(dir))

827

828

def visitchildrenset(self, dir):

828

def visitchildrenset(self, dir):

829

m2_set = self._m2.visitchildrenset(dir)

829

m2_set = self._m2.visitchildrenset(dir)

830

if m2_set == b'all':

830

if m2_set == b'all':

831

return set()

831

return set()

832

m1_set = self._m1.visitchildrenset(dir)

832

m1_set = self._m1.visitchildrenset(dir)

833

# Possible values for m1: 'all', 'this', set(...), set()

833

# Possible values for m1: 'all', 'this', set(...), set()

834

# Possible values for m2: 'this', set(...), set()

834

# Possible values for m2: 'this', set(...), set()

835

# If m2 has nothing under here that we care about, return m1, even if

835

# If m2 has nothing under here that we care about, return m1, even if

836

# it's 'all'. This is a change in behavior from visitdir, which would

836

# it's 'all'. This is a change in behavior from visitdir, which would

837

# return True, not 'all', for some reason.

837

# return True, not 'all', for some reason.

838

if not m2_set:

838

if not m2_set:

839

return m1_set

839

return m1_set

840

if m1_set in [b'all', b'this']:

840

if m1_set in [b'all', b'this']:

841

# Never return 'all' here if m2_set is any kind of non-empty (either

841

# Never return 'all' here if m2_set is any kind of non-empty (either

842

# 'this' or set(foo)), since m2 might return set() for a

842

# 'this' or set(foo)), since m2 might return set() for a

843

# subdirectory.

843

# subdirectory.

844

return b'this'

844

return b'this'

845

# Possible values for m1: set(...), set()

845

# Possible values for m1: set(...), set()

846

# Possible values for m2: 'this', set(...)

846

# Possible values for m2: 'this', set(...)

847

# We ignore m2's set results. They're possibly incorrect:

847

# We ignore m2's set results. They're possibly incorrect:

848

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

848

# m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):

849

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

849

# m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd

850

# return set(), which is *not* correct, we still need to visit 'dir'!

850

# return set(), which is *not* correct, we still need to visit 'dir'!

851

return m1_set

851

return m1_set

852

853

def isexact(self):

853

def isexact(self):

854

return self._m1.isexact()

854

return self._m1.isexact()

855

856

@encoding.strmethod

856

@encoding.strmethod

857

def __repr__(self):

857

def __repr__(self):

858

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

858

return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)

859

860

861

def intersectmatchers(m1, m2):

861

def intersectmatchers(m1, m2):

862

'''Composes two matchers by matching if both of them match.

862

'''Composes two matchers by matching if both of them match.

863

864

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

864

The second matcher's non-matching-attributes (bad, traversedir) are ignored.

865

'''

865

'''

866

if m1 is None or m2 is None:

866

if m1 is None or m2 is None:

867

return m1 or m2

867

return m1 or m2

868

if m1.always():

868

if m1.always():

869

m = copy.copy(m2)

869

m = copy.copy(m2)

870

# TODO: Consider encapsulating these things in a class so there's only

870

# TODO: Consider encapsulating these things in a class so there's only

871

# one thing to copy from m1.

871

# one thing to copy from m1.

872

m.bad = m1.bad

872

m.bad = m1.bad

873

m.traversedir = m1.traversedir

873

m.traversedir = m1.traversedir

874

return m

874

return m

875

if m2.always():

875

if m2.always():

876

m = copy.copy(m1)

876

m = copy.copy(m1)

877

return m

877

return m

878

return intersectionmatcher(m1, m2)

878

return intersectionmatcher(m1, m2)

879

880

881

class intersectionmatcher(basematcher):

881

class intersectionmatcher(basematcher):

882

def __init__(self, m1, m2):

882

def __init__(self, m1, m2):

883

super(intersectionmatcher, self).__init__()

883

super(intersectionmatcher, self).__init__()

884

self._m1 = m1

884

self._m1 = m1

885

self._m2 = m2

885

self._m2 = m2

886

self.bad = m1.bad

886

self.bad = m1.bad

887

self.traversedir = m1.traversedir

887

self.traversedir = m1.traversedir

888

889

@propertycache

889

@propertycache

890

def _files(self):

890

def _files(self):

891

if self.isexact():

891

if self.isexact():

892

m1, m2 = self._m1, self._m2

892

m1, m2 = self._m1, self._m2

893

if not m1.isexact():

893

if not m1.isexact():

894

m1, m2 = m2, m1

894

m1, m2 = m2, m1

895

return [f for f in m1.files() if m2(f)]

895

return [f for f in m1.files() if m2(f)]

896

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

896

# It neither m1 nor m2 is an exact matcher, we can't easily intersect

897

# the set of files, because their files() are not always files. For

897

# the set of files, because their files() are not always files. For

898

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

898

# example, if intersecting a matcher "-I glob:foo.txt" with matcher of

899

# "path:dir2", we don't want to remove "dir2" from the set.

899

# "path:dir2", we don't want to remove "dir2" from the set.

900

return self._m1.files() + self._m2.files()

900

return self._m1.files() + self._m2.files()

901

902

def matchfn(self, f):

902

def matchfn(self, f):

903

return self._m1(f) and self._m2(f)

903

return self._m1(f) and self._m2(f)

904

905

def visitdir(self, dir):

905

def visitdir(self, dir):

906

visit1 = self._m1.visitdir(dir)

906

visit1 = self._m1.visitdir(dir)

907

if visit1 == b'all':

907

if visit1 == b'all':

908

return self._m2.visitdir(dir)

908

return self._m2.visitdir(dir)

909

# bool() because visit1=True + visit2='all' should not be 'all'

909

# bool() because visit1=True + visit2='all' should not be 'all'

910

return bool(visit1 and self._m2.visitdir(dir))

910

return bool(visit1 and self._m2.visitdir(dir))

911

912

def visitchildrenset(self, dir):

912

def visitchildrenset(self, dir):

913

m1_set = self._m1.visitchildrenset(dir)

913

m1_set = self._m1.visitchildrenset(dir)

914

if not m1_set:

914

if not m1_set:

915

return set()

915

return set()

916

m2_set = self._m2.visitchildrenset(dir)

916

m2_set = self._m2.visitchildrenset(dir)

917

if not m2_set:

917

if not m2_set:

918

return set()

918

return set()

919

920

if m1_set == b'all':

920

if m1_set == b'all':

921

return m2_set

921

return m2_set

922

elif m2_set == b'all':

922

elif m2_set == b'all':

923

return m1_set

923

return m1_set

924

925

if m1_set == b'this' or m2_set == b'this':

925

if m1_set == b'this' or m2_set == b'this':

926

return b'this'

926

return b'this'

927

928

assert isinstance(m1_set, set) and isinstance(m2_set, set)

928

assert isinstance(m1_set, set) and isinstance(m2_set, set)

929

return m1_set.intersection(m2_set)

929

return m1_set.intersection(m2_set)

930

931

def always(self):

931

def always(self):

932

return self._m1.always() and self._m2.always()

932

return self._m1.always() and self._m2.always()

933

934

def isexact(self):

934

def isexact(self):

935

return self._m1.isexact() or self._m2.isexact()

935

return self._m1.isexact() or self._m2.isexact()

936

937

@encoding.strmethod

937

@encoding.strmethod

938

def __repr__(self):

938

def __repr__(self):

939

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

939

return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)

940

941

942

class subdirmatcher(basematcher):

942

class subdirmatcher(basematcher):

943

"""Adapt a matcher to work on a subdirectory only.

943

"""Adapt a matcher to work on a subdirectory only.

944

945

The paths are remapped to remove/insert the path as needed:

945

The paths are remapped to remove/insert the path as needed:

946

947

>>> from . import pycompat

947

>>> from . import pycompat

948

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])

948

>>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])

949

>>> m2 = subdirmatcher(b'sub', m1)

949

>>> m2 = subdirmatcher(b'sub', m1)

950

>>> m2(b'a.txt')

950

>>> m2(b'a.txt')

951

False

951

False

952

>>> m2(b'b.txt')

952

>>> m2(b'b.txt')

953

True

953

True

954

>>> m2.matchfn(b'a.txt')

954

>>> m2.matchfn(b'a.txt')

955

False

955

False

956

>>> m2.matchfn(b'b.txt')

956

>>> m2.matchfn(b'b.txt')

957

True

957

True

958

>>> m2.files()

958

>>> m2.files()

959

['b.txt']

959

['b.txt']

960

>>> m2.exact(b'b.txt')

960

>>> m2.exact(b'b.txt')

961

True

961

True

962

>>> def bad(f, msg):

962

>>> def bad(f, msg):

963

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

963

... print(pycompat.sysstr(b"%s: %s" % (f, msg)))

964

>>> m1.bad = bad

964

>>> m1.bad = bad

965

>>> m2.bad(b'x.txt', b'No such file')

965

>>> m2.bad(b'x.txt', b'No such file')

966

sub/x.txt: No such file

966

sub/x.txt: No such file

967

"""

967

"""

968

969

def __init__(self, path, matcher):

969

def __init__(self, path, matcher):

970

super(subdirmatcher, self).__init__()

970

super(subdirmatcher, self).__init__()

971

self._path = path

971

self._path = path

972

self._matcher = matcher

972

self._matcher = matcher

973

self._always = matcher.always()

973

self._always = matcher.always()

974

975

self._files = [

975

self._files = [

976

f[len(path) + 1 :]

976

f[len(path) + 1 :]

977

for f in matcher._files

977

for f in matcher._files

978

if f.startswith(path + b"/")

978

if f.startswith(path + b"/")

979

]

979

]

980

981

# If the parent repo had a path to this subrepo and the matcher is

981

# If the parent repo had a path to this subrepo and the matcher is

982

# a prefix matcher, this submatcher always matches.

982

# a prefix matcher, this submatcher always matches.

983

if matcher.prefix():

983

if matcher.prefix():

984

self._always = any(f == path for f in matcher._files)

984

self._always = any(f == path for f in matcher._files)

985

986

def bad(self, f, msg):

986

def bad(self, f, msg):

987

self._matcher.bad(self._path + b"/" + f, msg)

987

self._matcher.bad(self._path + b"/" + f, msg)

988

989

def matchfn(self, f):

989

def matchfn(self, f):

990

# Some information is lost in the superclass's constructor, so we

990

# Some information is lost in the superclass's constructor, so we

991

# can not accurately create the matching function for the subdirectory

991

# can not accurately create the matching function for the subdirectory

992

# from the inputs. Instead, we override matchfn() and visitdir() to

992

# from the inputs. Instead, we override matchfn() and visitdir() to

993

# call the original matcher with the subdirectory path prepended.

993

# call the original matcher with the subdirectory path prepended.

994

return self._matcher.matchfn(self._path + b"/" + f)

994

return self._matcher.matchfn(self._path + b"/" + f)

995

996

def visitdir(self, dir):

996

def visitdir(self, dir):

997

if dir == b'':

997

if dir == b'':

998

dir = self._path

998

dir = self._path

999

else:

999

else:

1000

dir = self._path + b"/" + dir

1000

dir = self._path + b"/" + dir

1001

return self._matcher.visitdir(dir)

1001

return self._matcher.visitdir(dir)

1002

1003

def visitchildrenset(self, dir):

1003

def visitchildrenset(self, dir):

1004

if dir == b'':

1004

if dir == b'':

1005

dir = self._path

1005

dir = self._path

1006

else:

1006

else:

1007

dir = self._path + b"/" + dir

1007

dir = self._path + b"/" + dir

1008

return self._matcher.visitchildrenset(dir)

1008

return self._matcher.visitchildrenset(dir)

1009

1010

def always(self):

1010

def always(self):

1011

return self._always

1011

return self._always

1012

1013

def prefix(self):

1013

def prefix(self):

1014

return self._matcher.prefix() and not self._always

1014

return self._matcher.prefix() and not self._always

1015

1016

@encoding.strmethod

1016

@encoding.strmethod

1017

def __repr__(self):

1017

def __repr__(self):

1018

return b'<subdirmatcher path=%r, matcher=%r>' % (

1018

return b'<subdirmatcher path=%r, matcher=%r>' % (

1019

self._path,

1019

self._path,

1020

self._matcher,

1020

self._matcher,

1021

)

1021

)

1022

1023

1024

class prefixdirmatcher(basematcher):

1024

class prefixdirmatcher(basematcher):

1025

"""Adapt a matcher to work on a parent directory.

1025

"""Adapt a matcher to work on a parent directory.

1026

1027

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1027

The matcher's non-matching-attributes (bad, traversedir) are ignored.

1028

1029

The prefix path should usually be the relative path from the root of

1029

The prefix path should usually be the relative path from the root of

1030

this matcher to the root of the wrapped matcher.

1030

this matcher to the root of the wrapped matcher.

1031

1032

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1032

>>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)

1033

>>> m2 = prefixdirmatcher(b'd/e', m1)

1033

>>> m2 = prefixdirmatcher(b'd/e', m1)

1034

>>> m2(b'a.txt')

1034

>>> m2(b'a.txt')

1035

False

1035

False

1036

>>> m2(b'd/e/a.txt')

1036

>>> m2(b'd/e/a.txt')

1037

True

1037

True

1038

>>> m2(b'd/e/b.txt')

1038

>>> m2(b'd/e/b.txt')

1039

False

1039

False

1040

>>> m2.files()

1040

>>> m2.files()

1041

['d/e/a.txt', 'd/e/f/b.txt']

1041

['d/e/a.txt', 'd/e/f/b.txt']

1042

>>> m2.exact(b'd/e/a.txt')

1042

>>> m2.exact(b'd/e/a.txt')

1043

True

1043

True

1044

>>> m2.visitdir(b'd')

1044

>>> m2.visitdir(b'd')

1045

True

1045

True

1046

>>> m2.visitdir(b'd/e')

1046

>>> m2.visitdir(b'd/e')

1047

True

1047

True

1048

>>> m2.visitdir(b'd/e/f')

1048

>>> m2.visitdir(b'd/e/f')

1049

True

1049

True

1050

>>> m2.visitdir(b'd/e/g')

1050

>>> m2.visitdir(b'd/e/g')

1051

False

1051

False

1052

>>> m2.visitdir(b'd/ef')

1052

>>> m2.visitdir(b'd/ef')

1053

False

1053

False

1054

"""

1054

"""

1055

1056

def __init__(self, path, matcher, badfn=None):

1056

def __init__(self, path, matcher, badfn=None):

1057

super(prefixdirmatcher, self).__init__(badfn)

1057

super(prefixdirmatcher, self).__init__(badfn)

1058

if not path:

1058

if not path:

1059

raise error.ProgrammingError(b'prefix path must not be empty')

1059

raise error.ProgrammingError(b'prefix path must not be empty')

1060

self._path = path

1060

self._path = path

1061

self._pathprefix = path + b'/'

1061

self._pathprefix = path + b'/'

1062

self._matcher = matcher

1062

self._matcher = matcher

1063

1064

@propertycache

1064

@propertycache

1065

def _files(self):

1065

def _files(self):

1066

return [self._pathprefix + f for f in self._matcher._files]

1066

return [self._pathprefix + f for f in self._matcher._files]

1067

1068

def matchfn(self, f):

1068

def matchfn(self, f):

1069

if not f.startswith(self._pathprefix):

1069

if not f.startswith(self._pathprefix):

1070

return False

1070

return False

1071

return self._matcher.matchfn(f[len(self._pathprefix) :])

1071

return self._matcher.matchfn(f[len(self._pathprefix) :])

1072

1073

@propertycache

1073

@propertycache

1074

def _pathdirs(self):

1074

def _pathdirs(self):

1075

return set(pathutil.finddirs(self._path))

1075

return set(pathutil.finddirs(self._path))

1076

1077

def visitdir(self, dir):

1077

def visitdir(self, dir):

1078

if dir == self._path:

1078

if dir == self._path:

1079

return self._matcher.visitdir(b'')

1079

return self._matcher.visitdir(b'')

1080

if dir.startswith(self._pathprefix):

1080

if dir.startswith(self._pathprefix):

1081

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1081

return self._matcher.visitdir(dir[len(self._pathprefix) :])

1082

return dir in self._pathdirs

1082

return dir in self._pathdirs

1083

1084

def visitchildrenset(self, dir):

1084

def visitchildrenset(self, dir):

1085

if dir == self._path:

1085

if dir == self._path:

1086

return self._matcher.visitchildrenset(b'')

1086

return self._matcher.visitchildrenset(b'')

1087

if dir.startswith(self._pathprefix):

1087

if dir.startswith(self._pathprefix):

1088

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1088

return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])

1089

if dir in self._pathdirs:

1089

if dir in self._pathdirs:

1090

return b'this'

1090

return b'this'

1091

return set()

1091

return set()

1092

1093

def isexact(self):

1093

def isexact(self):

1094

return self._matcher.isexact()

1094

return self._matcher.isexact()

1095

1096

def prefix(self):

1096

def prefix(self):

1097

return self._matcher.prefix()

1097

return self._matcher.prefix()

1098

1099

@encoding.strmethod

1099

@encoding.strmethod

1100

def __repr__(self):

1100

def __repr__(self):

1101

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1101

return b'<prefixdirmatcher path=%r, matcher=%r>' % (

1102

pycompat.bytestr(self._path),

1102

pycompat.bytestr(self._path),

1103

self._matcher,

1103

self._matcher,

1104

)

1104

)

1105

1106

1107

class unionmatcher(basematcher):

1107

class unionmatcher(basematcher):

1108

"""A matcher that is the union of several matchers.

1108

"""A matcher that is the union of several matchers.

1109

1110

The non-matching-attributes (bad, traversedir) are taken from the first

1110

The non-matching-attributes (bad, traversedir) are taken from the first

1111

matcher.

1111

matcher.

1112

"""

1112

"""

1113

1114

def __init__(self, matchers):

1114

def __init__(self, matchers):

1115

m1 = matchers[0]

1115

m1 = matchers[0]

1116

super(unionmatcher, self).__init__()

1116

super(unionmatcher, self).__init__()

1117

self.traversedir = m1.traversedir

1117

self.traversedir = m1.traversedir

1118

self._matchers = matchers

1118

self._matchers = matchers

1119

1120

def matchfn(self, f):

1120

def matchfn(self, f):

1121

for match in self._matchers:

1121

for match in self._matchers:

1122

if match(f):

1122

if match(f):

1123

return True

1123

return True

1124

return False

1124

return False

1125

1126

def visitdir(self, dir):

1126

def visitdir(self, dir):

1127

r = False

1127

r = False

1128

for m in self._matchers:

1128

for m in self._matchers:

1129

v = m.visitdir(dir)

1129

v = m.visitdir(dir)

1130

if v == b'all':

1130

if v == b'all':

1131

return v

1131

return v

1132

r |= v

1132

r |= v

1133

return r

1133

return r

1134

1135

def visitchildrenset(self, dir):

1135

def visitchildrenset(self, dir):

1136

r = set()

1136

r = set()

1137

this = False

1137

this = False

1138

for m in self._matchers:

1138

for m in self._matchers:

1139

v = m.visitchildrenset(dir)

1139

v = m.visitchildrenset(dir)

1140

if not v:

1140

if not v:

1141

continue

1141

continue

1142

if v == b'all':

1142

if v == b'all':

1143

return v

1143

return v

1144

if this or v == b'this':

1144

if this or v == b'this':

1145

this = True

1145

this = True

1146

# don't break, we might have an 'all' in here.

1146

# don't break, we might have an 'all' in here.

1147

continue

1147

continue

1148

assert isinstance(v, set)

1148

assert isinstance(v, set)

1149

r = r.union(v)

1149

r = r.union(v)

1150

if this:

1150

if this:

1151

return b'this'

1151

return b'this'

1152

return r

1152

return r

1153

1154

@encoding.strmethod

1154

@encoding.strmethod

1155

def __repr__(self):

1155

def __repr__(self):

1156

return b'<unionmatcher matchers=%r>' % self._matchers

1156

return b'<unionmatcher matchers=%r>' % self._matchers

1157

1158

1159

def patkind(pattern, default=None):

1159

def patkind(pattern, default=None):

1160

r'''If pattern is 'kind:pat' with a known kind, return kind.

1160

r'''If pattern is 'kind:pat' with a known kind, return kind.

1161

1162

>>> patkind(br're:.*\.c$')

1162

>>> patkind(br're:.*\.c$')

1163

're'

1163

're'

1164

>>> patkind(b'glob:*.c')

1164

>>> patkind(b'glob:*.c')

1165

'glob'

1165

'glob'

1166

>>> patkind(b'relpath:test.py')

1166

>>> patkind(b'relpath:test.py')

1167

'relpath'

1167

'relpath'

1168

>>> patkind(b'main.py')

1168

>>> patkind(b'main.py')

1169

>>> patkind(b'main.py', default=b're')

1169

>>> patkind(b'main.py', default=b're')

1170

're'

1170

're'

1171

'''

1171

'''

1172

return _patsplit(pattern, default)[0]

1172

return _patsplit(pattern, default)[0]

1173

1174

1175

def _patsplit(pattern, default):

1175

def _patsplit(pattern, default):

1176

"""Split a string into the optional pattern kind prefix and the actual

1176

"""Split a string into the optional pattern kind prefix and the actual

1177

pattern."""

1177

pattern."""

1178

if b':' in pattern:

1178

if b':' in pattern:

1179

kind, pat = pattern.split(b':', 1)

1179

kind, pat = pattern.split(b':', 1)

1180

if kind in allpatternkinds:

1180

if kind in allpatternkinds:

1181

return kind, pat

1181

return kind, pat

1182

return default, pattern

1182

return default, pattern

1183

1184

1185

def _globre(pat):

1185

def _globre(pat):

1186

r'''Convert an extended glob string to a regexp string.

1186

r'''Convert an extended glob string to a regexp string.

1187

1188

>>> from . import pycompat

1188

>>> from . import pycompat

1189

>>> def bprint(s):

1189

>>> def bprint(s):

1190

... print(pycompat.sysstr(s))

1190

... print(pycompat.sysstr(s))

1191

>>> bprint(_globre(br'?'))

1191

>>> bprint(_globre(br'?'))

1192

.

1192

.

1193

>>> bprint(_globre(br'*'))

1193

>>> bprint(_globre(br'*'))

1194

[^/]*

1194

[^/]*

1195

>>> bprint(_globre(br'**'))

1195

>>> bprint(_globre(br'**'))

1196

.*

1196

.*

1197

>>> bprint(_globre(br'**/a'))

1197

>>> bprint(_globre(br'**/a'))

1198

(?:.*/)?a

1198

(?:.*/)?a

1199

>>> bprint(_globre(br'a/**/b'))

1199

>>> bprint(_globre(br'a/**/b'))

1200

a/(?:.*/)?b

1200

a/(?:.*/)?b

1201

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1201

>>> bprint(_globre(br'[a*?!^][^b][!c]'))

1202

[a*?!^][\^b][^c]

1202

[a*?!^][\^b][^c]

1203

>>> bprint(_globre(br'{a,b}'))

1203

>>> bprint(_globre(br'{a,b}'))

1204

(?:a|b)

1204

(?:a|b)

1205

>>> bprint(_globre(br'.\*\?'))

1205

>>> bprint(_globre(br'.\*\?'))

1206

\.\*\?

1206

\.\*\?

1207

'''

1207

'''

1208

i, n = 0, len(pat)

1208

i, n = 0, len(pat)

1209

res = b''

1209

res = b''

1210

group = 0

1210

group = 0

1211

escape = util.stringutil.regexbytesescapemap.get

1211

escape = util.stringutil.regexbytesescapemap.get

1212

1213

def peek():

1213

def peek():

1214

return i < n and pat[i : i + 1]

1214

return i < n and pat[i : i + 1]

1215

1216

while i < n:

1216

while i < n:

1217

c = pat[i : i + 1]

1217

c = pat[i : i + 1]

1218

i += 1

1218

i += 1

1219

if c not in b'*?[{},\\':

1219

if c not in b'*?[{},\\':

1220

res += escape(c, c)

1220

res += escape(c, c)

1221

elif c == b'*':

1221

elif c == b'*':

1222

if peek() == b'*':

1222

if peek() == b'*':

1223

i += 1

1223

i += 1

1224

if peek() == b'/':

1224

if peek() == b'/':

1225

i += 1

1225

i += 1

1226

res += b'(?:.*/)?'

1226

res += b'(?:.*/)?'

1227

else:

1227

else:

1228

res += b'.*'

1228

res += b'.*'

1229

else:

1229

else:

1230

res += b'[^/]*'

1230

res += b'[^/]*'

1231

elif c == b'?':

1231

elif c == b'?':

1232

res += b'.'

1232

res += b'.'

1233

elif c == b'[':

1233

elif c == b'[':

1234

j = i

1234

j = i

1235

if j < n and pat[j : j + 1] in b'!]':

1235

if j < n and pat[j : j + 1] in b'!]':

1236

j += 1

1236

j += 1

1237

while j < n and pat[j : j + 1] != b']':

1237

while j < n and pat[j : j + 1] != b']':

1238

j += 1

1238

j += 1

1239

if j >= n:

1239

if j >= n:

1240

res += b'\\['

1240

res += b'\\['

1241

else:

1241

else:

1242

stuff = pat[i:j].replace(b'\\', b'\\\\')

1242

stuff = pat[i:j].replace(b'\\', b'\\\\')

1243

i = j + 1

1243

i = j + 1

1244

if stuff[0:1] == b'!':

1244

if stuff[0:1] == b'!':

1245

stuff = b'^' + stuff[1:]

1245

stuff = b'^' + stuff[1:]

1246

elif stuff[0:1] == b'^':

1246

elif stuff[0:1] == b'^':

1247

stuff = b'\\' + stuff

1247

stuff = b'\\' + stuff

1248

res = b'%s[%s]' % (res, stuff)

1248

res = b'%s[%s]' % (res, stuff)

1249

elif c == b'{':

1249

elif c == b'{':

1250

group += 1

1250

group += 1

1251

res += b'(?:'

1251

res += b'(?:'

1252

elif c == b'}' and group:

1252

elif c == b'}' and group:

1253

res += b')'

1253

res += b')'

1254

group -= 1

1254

group -= 1

1255

elif c == b',' and group:

1255

elif c == b',' and group:

1256

res += b'|'

1256

res += b'|'

1257

elif c == b'\\':

1257

elif c == b'\\':

1258

p = peek()

1258

p = peek()

1259

if p:

1259

if p:

1260

i += 1

1260

i += 1

1261

res += escape(p, p)

1261

res += escape(p, p)

1262

else:

1262

else:

1263

res += escape(c, c)

1263

res += escape(c, c)

1264

else:

1264

else:

1265

res += escape(c, c)

1265

res += escape(c, c)

1266

return res

1266

return res

1267

1268

1269

def _regex(kind, pat, globsuffix):

1269

def _regex(kind, pat, globsuffix):

1270

'''Convert a (normalized) pattern of any kind into a

1270

'''Convert a (normalized) pattern of any kind into a

1271

regular expression.

1271

regular expression.

1272

globsuffix is appended to the regexp of globs.'''

1272

globsuffix is appended to the regexp of globs.'''

1273

1274

if rustmod is not None:

1274

if rustmod is not None:

1275

try:

1275

try:

1276

return rustmod.build_single_regex(kind, pat, globsuffix)

1276

return rustmod.build_single_regex(kind, pat, globsuffix)

1277

except rustmod.PatternError:

1277

except rustmod.PatternError:

1278

raise error.ProgrammingError(

1278

raise error.ProgrammingError(

1279

b'not a regex pattern: %s:%s' % (kind, pat)

1279

b'not a regex pattern: %s:%s' % (kind, pat)

1280

)

1280

)

1281

1282

if not pat and kind in (b'glob', b'relpath'):

1282

if not pat and kind in (b'glob', b'relpath'):

1283

return b''

1283

return b''

1284

if kind == b're':

1284

if kind == b're':

1285

return pat

1285

return pat

1286

if kind in (b'path', b'relpath'):

1286

if kind in (b'path', b'relpath'):

1287

if pat == b'.':

1287

if pat == b'.':

1288

return b''

1288

return b''

1289

return util.stringutil.reescape(pat) + b'(?:/|$)'

1289

return util.stringutil.reescape(pat) + b'(?:/|$)'

1290

if kind == b'rootfilesin':

1290

if kind == b'rootfilesin':

1291

if pat == b'.':

1291

if pat == b'.':

1292

escaped = b''

1292

escaped = b''

1293

else:

1293

else:

1294

# Pattern is a directory name.

1294

# Pattern is a directory name.

1295

escaped = util.stringutil.reescape(pat) + b'/'

1295

escaped = util.stringutil.reescape(pat) + b'/'

1296

# Anything after the pattern must be a non-directory.

1296

# Anything after the pattern must be a non-directory.

1297

return escaped + b'[^/]+$'

1297

return escaped + b'[^/]+$'

1298

if kind == b'relglob':

1298

if kind == b'relglob':

1299

globre = _globre(pat)

1299

globre = _globre(pat)

1300

if globre.startswith(b'[^/]*'):

1300

if globre.startswith(b'[^/]*'):

1301

# When pat has the form *XYZ (common), make the returned regex more

1301

# When pat has the form *XYZ (common), make the returned regex more

1302

# legible by returning the regex for **XYZ instead of **/*XYZ.

1302

# legible by returning the regex for **XYZ instead of **/*XYZ.

1303

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1303

return b'.*' + globre[len(b'[^/]*') :] + globsuffix

1304

return b'(?:|.*/)' + globre + globsuffix

1304

return b'(?:|.*/)' + globre + globsuffix

1305

if kind == b'relre':

1305

if kind == b'relre':

1306

if pat.startswith(b'^'):

1306

if pat.startswith(b'^'):

1307

return pat

1307

return pat

1308

return b'.*' + pat

1308

return b'.*' + pat

1309

if kind in (b'glob', b'rootglob'):

1309

if kind in (b'glob', b'rootglob'):

1310

return _globre(pat) + globsuffix

1310

return _globre(pat) + globsuffix

1311

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1311

raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))

1312

1313

1314

def _buildmatch(kindpats, globsuffix, root):

1314

def _buildmatch(kindpats, globsuffix, root):

1315

'''Return regexp string and a matcher function for kindpats.

1315

'''Return regexp string and a matcher function for kindpats.

1316

globsuffix is appended to the regexp of globs.'''

1316

globsuffix is appended to the regexp of globs.'''

1317

matchfuncs = []

1317

matchfuncs = []

1318

1319

subincludes, kindpats = _expandsubinclude(kindpats, root)

1319

subincludes, kindpats = _expandsubinclude(kindpats, root)

1320

if subincludes:

1320

if subincludes:

1321

submatchers = {}

1321

submatchers = {}

1322

1323

def matchsubinclude(f):

1323

def matchsubinclude(f):

1324

for prefix, matcherargs in subincludes:

1324

for prefix, matcherargs in subincludes:

1325

if f.startswith(prefix):

1325

if f.startswith(prefix):

1326

mf = submatchers.get(prefix)

1326

mf = submatchers.get(prefix)

1327

if mf is None:

1327

if mf is None:

1328

mf = match(*matcherargs)

1328

mf = match(*matcherargs)

1329

submatchers[prefix] = mf

1329

submatchers[prefix] = mf

1330

1331

if mf(f[len(prefix) :]):

1331

if mf(f[len(prefix) :]):

1332

return True

1332

return True

1333

return False

1333

return False

1334

1335

matchfuncs.append(matchsubinclude)

1335

matchfuncs.append(matchsubinclude)

1336

1337

regex = b''

1337

regex = b''

1338

if kindpats:

1338

if kindpats:

1339

if all(k == b'rootfilesin' for k, p, s in kindpats):

1339

if all(k == b'rootfilesin' for k, p, s in kindpats):

1340

dirs = {p for k, p, s in kindpats}

1340

dirs = {p for k, p, s in kindpats}

1341

1342

def mf(f):

1342

def mf(f):

1343

i = f.rfind(b'/')

1343

i = f.rfind(b'/')

1344

if i >= 0:

1344

if i >= 0:

1345

dir = f[:i]

1345

dir = f[:i]

1346

else:

1346

else:

1347

dir = b'.'

1347

dir = b'.'

1348

return dir in dirs

1348

return dir in dirs

1349

1350

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1350

regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))

1351

matchfuncs.append(mf)

1351

matchfuncs.append(mf)

1352

else:

1352

else:

1353

regex, mf = _buildregexmatch(kindpats, globsuffix)

1353

regex, mf = _buildregexmatch(kindpats, globsuffix)

1354

matchfuncs.append(mf)

1354

matchfuncs.append(mf)

1355

1356

if len(matchfuncs) == 1:

1356

if len(matchfuncs) == 1:

1357

return regex, matchfuncs[0]

1357

return regex, matchfuncs[0]

1358

else:

1358

else:

1359

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1359

return regex, lambda f: any(mf(f) for mf in matchfuncs)

1360

1361

1362

MAX_RE_SIZE = 20000

1362

MAX_RE_SIZE = 20000

1363

1364

1365

def _joinregexes(regexps):

1365

def _joinregexes(regexps):

1366

"""gather multiple regular expressions into a single one"""

1366

"""gather multiple regular expressions into a single one"""

1367

return b'|'.join(regexps)

1367

return b'|'.join(regexps)

1368

1369

1370

def _buildregexmatch(kindpats, globsuffix):

1370

def _buildregexmatch(kindpats, globsuffix):

1371

"""Build a match function from a list of kinds and kindpats,

1371

"""Build a match function from a list of kinds and kindpats,

1372

return regexp string and a matcher function.

1372

return regexp string and a matcher function.

1373

1374

Test too large input

1374

Test too large input

1375

>>> _buildregexmatch([

1375

>>> _buildregexmatch([

1376

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1376

... (b'relglob', b'?' * MAX_RE_SIZE, b'')

1377

... ], b'$')

1377

... ], b'$')

1378

Traceback (most recent call last):

1378

Traceback (most recent call last):

1379

...

1379

...

1380

Abort: matcher pattern is too long (20009 bytes)

1380

Abort: matcher pattern is too long (20009 bytes)

1381

"""

1381

"""

1382

try:

1382

try:

1383

allgroups = []

1383

allgroups = []

1384

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1384

regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]

1385

fullregexp = _joinregexes(regexps)

1385

fullregexp = _joinregexes(regexps)

1386

1387

startidx = 0

1387

startidx = 0

1388

groupsize = 0

1388

groupsize = 0

1389

for idx, r in enumerate(regexps):

1389

for idx, r in enumerate(regexps):

1390

piecesize = len(r)

1390

piecesize = len(r)

1391

if piecesize > MAX_RE_SIZE:

1391

if piecesize > MAX_RE_SIZE:

1392

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1392

msg = _(b"matcher pattern is too long (%d bytes)") % piecesize

1393

raise error.Abort(msg)

1393

raise error.Abort(msg)

1394

elif (groupsize + piecesize) > MAX_RE_SIZE:

1394

elif (groupsize + piecesize) > MAX_RE_SIZE:

1395

group = regexps[startidx:idx]

1395

group = regexps[startidx:idx]

1396

allgroups.append(_joinregexes(group))

1396

allgroups.append(_joinregexes(group))

1397

startidx = idx

1397

startidx = idx

1398

groupsize = 0

1398

groupsize = 0

1399

groupsize += piecesize + 1

1399

groupsize += piecesize + 1

1400

1401

if startidx == 0:

1401

if startidx == 0:

1402

matcher = _rematcher(fullregexp)

1402

matcher = _rematcher(fullregexp)

1403

func = lambda s: bool(matcher(s))

1403

func = lambda s: bool(matcher(s))

1404

else:

1404

else:

1405

group = regexps[startidx:]

1405

group = regexps[startidx:]

1406

allgroups.append(_joinregexes(group))

1406

allgroups.append(_joinregexes(group))

1407

allmatchers = [_rematcher(g) for g in allgroups]

1407

allmatchers = [_rematcher(g) for g in allgroups]

1408

func = lambda s: any(m(s) for m in allmatchers)

1408

func = lambda s: any(m(s) for m in allmatchers)

1409

return fullregexp, func

1409

return fullregexp, func

1410

except re.error:

1410

except re.error:

1411

for k, p, s in kindpats:

1411

for k, p, s in kindpats:

1412

try:

1412

try:

1413

_rematcher(_regex(k, p, globsuffix))

1413

_rematcher(_regex(k, p, globsuffix))

1414

except re.error:

1414

except re.error:

1415

if s:

1415

if s:

1416

raise error.Abort(

1416

raise error.Abort(

1417

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1417

_(b"%s: invalid pattern (%s): %s") % (s, k, p)

1418

)

1418

)

1419

else:

1419

else:

1420

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1420

raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))

1421

raise error.Abort(_(b"invalid pattern"))

1421

raise error.Abort(_(b"invalid pattern"))

1422

1423

1424

def _patternrootsanddirs(kindpats):

1424

def _patternrootsanddirs(kindpats):

1425

'''Returns roots and directories corresponding to each pattern.

1425

'''Returns roots and directories corresponding to each pattern.

1426

1427

This calculates the roots and directories exactly matching the patterns and

1427

This calculates the roots and directories exactly matching the patterns and

1428

returns a tuple of (roots, dirs) for each. It does not return other

1428

returns a tuple of (roots, dirs) for each. It does not return other

1429

directories which may also need to be considered, like the parent

1429

directories which may also need to be considered, like the parent

1430

directories.

1430

directories.

1431

'''

1431

'''

1432

r = []

1432

r = []

1433

d = []

1433

d = []

1434

for kind, pat, source in kindpats:

1434

for kind, pat, source in kindpats:

1435

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1435

if kind in (b'glob', b'rootglob'): # find the non-glob prefix

1436

root = []

1436

root = []

1437

for p in pat.split(b'/'):

1437

for p in pat.split(b'/'):

1438

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1438

if b'[' in p or b'{' in p or b'*' in p or b'?' in p:

1439

break

1439

break

1440

root.append(p)

1440

root.append(p)

1441

r.append(b'/'.join(root))

1441

r.append(b'/'.join(root))

1442

elif kind in (b'relpath', b'path'):

1442

elif kind in (b'relpath', b'path'):

1443

if pat == b'.':

1443

if pat == b'.':

1444

pat = b''

1444

pat = b''

1445

r.append(pat)

1445

r.append(pat)

1446

elif kind in (b'rootfilesin',):

1446

elif kind in (b'rootfilesin',):

1447

if pat == b'.':

1447

if pat == b'.':

1448

pat = b''

1448

pat = b''

1449

d.append(pat)

1449

d.append(pat)

1450

else: # relglob, re, relre

1450

else: # relglob, re, relre

1451

r.append(b'')

1451

r.append(b'')

1452

return r, d

1452

return r, d

1453

1454

1455

def _roots(kindpats):

1455

def _roots(kindpats):

1456

'''Returns root directories to match recursively from the given patterns.'''

1456

'''Returns root directories to match recursively from the given patterns.'''

1457

roots, dirs = _patternrootsanddirs(kindpats)

1457

roots, dirs = _patternrootsanddirs(kindpats)

1458

return roots

1458

return roots

1459

1460

1461

def _rootsdirsandparents(kindpats):

1461

def _rootsdirsandparents(kindpats):

1462

'''Returns roots and exact directories from patterns.

1462

'''Returns roots and exact directories from patterns.

1463

1464

`roots` are directories to match recursively, `dirs` should

1464

`roots` are directories to match recursively, `dirs` should

1465

be matched non-recursively, and `parents` are the implicitly required

1465

be matched non-recursively, and `parents` are the implicitly required

1466

directories to walk to items in either roots or dirs.

1466

directories to walk to items in either roots or dirs.

1467

1468

Returns a tuple of (roots, dirs, parents).

1468

Returns a tuple of (roots, dirs, parents).

1469

1470

>>> r = _rootsdirsandparents(

1470

>>> r = _rootsdirsandparents(

1471

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1471

... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),

1472

... (b'glob', b'g*', b'')])

1472

... (b'glob', b'g*', b'')])

1473

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1473

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1474

(['g/h', 'g/h', ''], []) ['', 'g']

1474

(['g/h', 'g/h', ''], []) ['', 'g']

1475

>>> r = _rootsdirsandparents(

1475

>>> r = _rootsdirsandparents(

1476

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1476

... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])

1477

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1477

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1478

([], ['g/h', '']) ['', 'g']

1478

([], ['g/h', '']) ['', 'g']

1479

>>> r = _rootsdirsandparents(

1479

>>> r = _rootsdirsandparents(

1480

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1480

... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),

1481

... (b'path', b'', b'')])

1481

... (b'path', b'', b'')])

1482

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1482

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1483

(['r', 'p/p', ''], []) ['', 'p']

1483

(['r', 'p/p', ''], []) ['', 'p']

1484

>>> r = _rootsdirsandparents(

1484

>>> r = _rootsdirsandparents(

1485

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1485

... [(b'relglob', b'rg*', b''), (b're', b're/', b''),

1486

... (b'relre', b'rr', b'')])

1486

... (b'relre', b'rr', b'')])

1487

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1487

>>> print(r[0:2], sorted(r[2])) # the set has an unstable output

1488

(['', '', ''], []) ['']

1488

(['', '', ''], []) ['']

1489

'''

1489

'''

1490

r, d = _patternrootsanddirs(kindpats)

1490

r, d = _patternrootsanddirs(kindpats)

1491

1492

p = set()

1492

p = set()

1493

# Add the parents as non-recursive/exact directories, since they must be

1493

# Add the parents as non-recursive/exact directories, since they must be

1494

# scanned to get to either the roots or the other exact directories.

1494

# scanned to get to either the roots or the other exact directories.

1495

p.update(pathutil.dirs(d))

1495

p.update(pathutil.dirs(d))

1496

p.update(pathutil.dirs(r))

1496

p.update(pathutil.dirs(r))

1497

1498

# FIXME: all uses of this function convert these to sets, do so before

1498

# FIXME: all uses of this function convert these to sets, do so before

1499

# returning.

1499

# returning.

1500

# FIXME: all uses of this function do not need anything in 'roots' and

1500

# FIXME: all uses of this function do not need anything in 'roots' and

1501

# 'dirs' to also be in 'parents', consider removing them before returning.

1501

# 'dirs' to also be in 'parents', consider removing them before returning.

1502

return r, d, p

1502

return r, d, p

1503

1504

1505

def _explicitfiles(kindpats):

1505

def _explicitfiles(kindpats):

1506

'''Returns the potential explicit filenames from the patterns.

1506

'''Returns the potential explicit filenames from the patterns.

1507

1508

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1508

>>> _explicitfiles([(b'path', b'foo/bar', b'')])

1509

['foo/bar']

1509

['foo/bar']

1510

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1510

>>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])

1511

[]

1511

[]

1512

'''

1512

'''

1513

# Keep only the pattern kinds where one can specify filenames (vs only

1513

# Keep only the pattern kinds where one can specify filenames (vs only

1514

# directory names).

1514

# directory names).

1515

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1515

filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]

1516

return _roots(filable)

1516

return _roots(filable)

1517

1518

1519

def _prefix(kindpats):

1519

def _prefix(kindpats):

1520

'''Whether all the patterns match a prefix (i.e. recursively)'''

1520

'''Whether all the patterns match a prefix (i.e. recursively)'''

1521

for kind, pat, source in kindpats:

1521

for kind, pat, source in kindpats:

1522

if kind not in (b'path', b'relpath'):

1522

if kind not in (b'path', b'relpath'):

1523

return False

1523

return False

1524

return True

1524

return True

1525

1526

1527

_commentre = None

1527

_commentre = None

1528

1529

1530

def readpatternfile(filepath, warn, sourceinfo=False):

1530

def readpatternfile(filepath, warn, sourceinfo=False):

1531

'''parse a pattern file, returning a list of

1531

'''parse a pattern file, returning a list of

1532

patterns. These patterns should be given to compile()

1532

patterns. These patterns should be given to compile()

1533

to be validated and converted into a match function.

1533

to be validated and converted into a match function.

1534

1535

trailing white space is dropped.

1535

trailing white space is dropped.

1536

the escape character is backslash.

1536

the escape character is backslash.

1537

comments start with #.

1537

comments start with #.

1538

empty lines are skipped.

1538

empty lines are skipped.

1539

1540

lines can be of the following formats:

1540

lines can be of the following formats:

1541

1542

syntax: regexp # defaults following lines to non-rooted regexps

1542

syntax: regexp # defaults following lines to non-rooted regexps

1543

syntax: glob # defaults following lines to non-rooted globs

1543

syntax: glob # defaults following lines to non-rooted globs

1544

re:pattern # non-rooted regular expression

1544

re:pattern # non-rooted regular expression

1545

glob:pattern # non-rooted glob

1545

glob:pattern # non-rooted glob

1546

rootglob:pat # rooted glob (same root as ^ in regexps)

1546

rootglob:pat # rooted glob (same root as ^ in regexps)

1547

pattern # pattern of the current default type

1547

pattern # pattern of the current default type

1548

1549

if sourceinfo is set, returns a list of tuples:

1549

if sourceinfo is set, returns a list of tuples:

1550

(pattern, lineno, originalline).

1550

(pattern, lineno, originalline).

1551

This is useful to debug ignore patterns.

1551

This is useful to debug ignore patterns.

1552

'''

1552

'''

1553

1554

if rustmod is not None:

1554

if rustmod is not None:

1555

result, warnings = rustmod.read_pattern_file(

1555

result, warnings = rustmod.read_pattern_file(

1556

filepath, bool(warn), sourceinfo,

1556

filepath, bool(warn), sourceinfo,

1557

)

1557

)

1558

1559

for warning_params in warnings:

1559

for warning_params in warnings:

1560

# Can't be easily emitted from Rust, because it would require

1560

# Can't be easily emitted from Rust, because it would require

1561

# a mechanism for both gettext and calling the `warn` function.

1561

# a mechanism for both gettext and calling the `warn` function.

1562

warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)

1562

warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)

1563

1564

return result

1564

return result

1565

1566

syntaxes = {

1566

syntaxes = {

1567

b're': b'relre:',

1567

b're': b'relre:',

1568

b'regexp': b'relre:',

1568

b'regexp': b'relre:',

1569

b'glob': b'relglob:',

1569

b'glob': b'relglob:',

1570

b'rootglob': b'rootglob:',

1570

b'rootglob': b'rootglob:',

1571

b'include': b'include',

1571

b'include': b'include',

1572

b'subinclude': b'subinclude',

1572

b'subinclude': b'subinclude',

1573

}

1573

}

1574

syntax = b'relre:'

1574

syntax = b'relre:'

1575

patterns = []

1575

patterns = []

1576

1577

fp = open(filepath, b'rb')

1577

fp = open(filepath, b'rb')

1578

for lineno, line in enumerate(util.iterfile(fp), start=1):

1578

for lineno, line in enumerate(util.iterfile(fp), start=1):

1579

if b"#" in line:

1579

if b"#" in line:

1580

global _commentre

1580

global _commentre

1581

if not _commentre:

1581

if not _commentre:

1582

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1582

_commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')

1583

# remove comments prefixed by an even number of escapes

1583

# remove comments prefixed by an even number of escapes

1584

m = _commentre.search(line)

1584

m = _commentre.search(line)

1585

if m:

1585

if m:

1586

line = line[: m.end(1)]

1586

line = line[: m.end(1)]

1587

# fixup properly escaped comments that survived the above

1587

# fixup properly escaped comments that survived the above

1588

line = line.replace(b"\\#", b"#")

1588

line = line.replace(b"\\#", b"#")

1589

line = line.rstrip()

1589

line = line.rstrip()

1590

if not line:

1590

if not line:

1591

continue

1591

continue

1592

1593

if line.startswith(b'syntax:'):

1593

if line.startswith(b'syntax:'):

1594

s = line[7:].strip()

1594

s = line[7:].strip()

1595

try:

1595

try:

1596

syntax = syntaxes[s]

1596

syntax = syntaxes[s]

1597

except KeyError:

1597

except KeyError:

1598

if warn:

1598

if warn:

1599

warn(

1599

warn(

1600

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1600

_(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)

1601

)

1601

)

1602

continue

1602

continue

1603

1604

linesyntax = syntax

1604

linesyntax = syntax

1605

for s, rels in pycompat.iteritems(syntaxes):

1605

for s, rels in pycompat.iteritems(syntaxes):

1606

if line.startswith(rels):

1606

if line.startswith(rels):

1607

linesyntax = rels

1607

linesyntax = rels

1608

line = line[len(rels) :]

1608

line = line[len(rels) :]

1609

break

1609

break

1610

elif line.startswith(s + b':'):

1610

elif line.startswith(s + b':'):

1611

linesyntax = rels

1611

linesyntax = rels

1612

line = line[len(s) + 1 :]

1612

line = line[len(s) + 1 :]

1613

break

1613

break

1614

if sourceinfo:

1614

if sourceinfo:

1615

patterns.append((linesyntax + line, lineno, line))

1615

patterns.append((linesyntax + line, lineno, line))

1616

else:

1616

else:

1617

patterns.append(linesyntax + line)

1617

patterns.append(linesyntax + line)

1618

fp.close()

1618

fp.close()

1619

return patterns

1619

return patterns

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # match.py - filename matching
             #
             #  Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import, print_function
             import copy
             import itertools
             import os
             import re
             from .i18n import _
             from .pycompat import open
             from . import (
                 encoding,
                 error,
                 pathutil,
                 policy,
                 pycompat,
                 util,
             )
             from .utils import stringutil
             rustmod = policy.importrust('filepatterns')
             allpatternkinds = (
                 b're',
                 b'glob',
                 b'path',
                 b'relglob',
                 b'relpath',
                 b'relre',
                 b'rootglob',
                 b'listfile',
                 b'listfile0',
                 b'set',
                 b'include',
                 b'subinclude',
                 b'rootfilesin',
             )
             cwdrelativepatternkinds = (b'relpath', b'glob')
             propertycache = util.propertycache
             def _rematcher(regex):
                 '''compile the regexp with the best available regexp engine and return a
                 matcher function'''
                 m = util.re.compile(regex)
                 try:
                     # slightly faster, provided by facebook's re2 bindings
                     return m.test_match
                 except AttributeError:
                     return m.match
             def _expandsets(kindpats, ctx=None, listsubrepos=False, badfn=None):
                 '''Returns the kindpats list with the 'set' patterns expanded to matchers'''
                 matchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'set':
                         if ctx is None:
                             raise error.ProgrammingError(
                                 b"fileset expression with no context"
                             )
                         matchers.append(ctx.matchfileset(pat, badfn=badfn))
                         if listsubrepos:
                             for subpath in ctx.substate:
                                 sm = ctx.sub(subpath).matchfileset(pat, badfn=badfn)
                                 pm = prefixdirmatcher(subpath, sm, badfn=badfn)
                                 matchers.append(pm)
                         continue
                     other.append((kind, pat, source))
                 return matchers, other
             def _expandsubinclude(kindpats, root):
                 '''Returns the list of subinclude matcher args and the kindpats without the
                 subincludes in it.'''
                 relmatchers = []
                 other = []
                 for kind, pat, source in kindpats:
                     if kind == b'subinclude':
                         sourceroot = pathutil.dirname(util.normpath(source))
                         pat = util.pconvert(pat)
                         path = pathutil.join(sourceroot, pat)
                         newroot = pathutil.dirname(path)
                         matcherargs = (newroot, b'', [], [b'include:%s' % path])
                         prefix = pathutil.canonpath(root, root, newroot)
                         if prefix:
                             prefix += b'/'
                         relmatchers.append((prefix, matcherargs))
                     else:
                         other.append((kind, pat, source))
                 return relmatchers, other
             def _kindpatsalwaysmatch(kindpats):
                 """"Checks whether the kindspats match everything, as e.g.
                 'relpath:.' does.
                 """
                 for kind, pat, source in kindpats:
                     if pat != b'' or kind not in [b'relpath', b'glob']:
                         return False
                 return True
             def _buildkindpatsmatcher(
                 matchercls, root, kindpats, ctx=None, listsubrepos=False, badfn=None
             ):
                 matchers = []
                 fms, kindpats = _expandsets(
                     kindpats, ctx=ctx, listsubrepos=listsubrepos, badfn=badfn
                 )
                 if kindpats:
                     m = matchercls(root, kindpats, badfn=badfn)
                     matchers.append(m)
                 if fms:
                     matchers.extend(fms)
                 if not matchers:
                     return nevermatcher(badfn=badfn)
                 if len(matchers) == 1:
                     return matchers[0]
                 return unionmatcher(matchers)
             def match(
                 root,
                 cwd,
                 patterns=None,
                 include=None,
                 exclude=None,
                 default=b'glob',
                 auditor=None,
                 ctx=None,
                 listsubrepos=False,
                 warn=None,
                 badfn=None,
                 icasefs=False,
             ):
                 r"""build an object to match a set of file patterns
                 arguments:
                 root - the canonical root of the tree you're matching against
                 cwd - the current working directory, if relevant
                 patterns - patterns to find
                 include - patterns to include (unless they are excluded)
                 exclude - patterns to exclude (even if they are included)
                 default - if a pattern in patterns has no explicit type, assume this one
                 auditor - optional path auditor
                 ctx - optional changecontext
                 listsubrepos - if True, recurse into subrepositories
                 warn - optional function used for printing warnings
                 badfn - optional bad() callback for this matcher instead of the default
                 icasefs - make a matcher for wdir on case insensitive filesystems, which
                     normalizes the given patterns to the case in the filesystem
                 a pattern is one of:
                 'glob:<glob>' - a glob relative to cwd
                 're:<regexp>' - a regular expression
                 'path:<path>' - a path relative to repository root, which is matched
                                 recursively
                 'rootfilesin:<path>' - a path relative to repository root, which is
                                 matched non-recursively (will not match subdirectories)
                 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
                 'relpath:<path>' - a path relative to cwd
                 'relre:<regexp>' - a regexp that needn't match the start of a name
                 'set:<fileset>' - a fileset expression
                 'include:<path>' - a file of patterns to read and include
                 'subinclude:<path>' - a file of patterns to match against files under
                                       the same directory
                 '<something>' - a pattern of the specified default type
                 >>> def _match(root, *args, **kwargs):
                 ...     return match(util.localpath(root), *args, **kwargs)
                 Usually a patternmatcher is returned:
                 >>> _match(b'/foo', b'.', [b're:.*\.c$', b'path:foo/a', b'*.py'])
                 <patternmatcher patterns='.*\\.c$|foo/a(?:/|$)|[^/]*\\.py$'>
                 Combining 'patterns' with 'include' (resp. 'exclude') gives an
                 intersectionmatcher (resp. a differencematcher):
                 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], include=[b'path:lib']))
                 <class 'mercurial.match.intersectionmatcher'>
                 >>> type(_match(b'/foo', b'.', [b're:.*\.c$'], exclude=[b'path:build']))
                 <class 'mercurial.match.differencematcher'>
                 Notice that, if 'patterns' is empty, an alwaysmatcher is returned:
                 >>> _match(b'/foo', b'.', [])
                 <alwaysmatcher>
                 The 'default' argument determines which kind of pattern is assumed if a
                 pattern has no prefix:
                 >>> _match(b'/foo', b'.', [b'.*\.c$'], default=b're')
                 <patternmatcher patterns='.*\\.c$'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b'relpath')
                 <patternmatcher patterns='main\\.py(?:/|$)'>
                 >>> _match(b'/foo', b'.', [b'main.py'], default=b're')
                 <patternmatcher patterns='main.py'>
                 The primary use of matchers is to check whether a value (usually a file
                 name) matches againset one of the patterns given at initialization. There
                 are two ways of doing this check.
                 >>> m = _match(b'/foo', b'', [b're:.*\.c$', b'relpath:a'])
 . Calling the matcher with a file name returns True if any pattern
                 matches that file name:
                 >>> m(b'a')
                 True
                 >>> m(b'main.c')
                 True
                 >>> m(b'test.py')
                 False
 . Using the exact() method only returns True if the file name matches one
                 of the exact patterns (i.e. not re: or glob: patterns):
                 >>> m.exact(b'a')
                 True
                 >>> m.exact(b'main.c')
                 False
                 """
                 assert os.path.isabs(root)
-                cwd = util.normpath(os.path.join(root, cwd))
+                cwd = os.path.join(root, util.localpath(cwd))
                 normalize = _donormalize
                 if icasefs:
                     dirstate = ctx.repo().dirstate
                     dsnormalize = dirstate.normalize
                     def normalize(patterns, default, root, cwd, auditor, warn):
                         kp = _donormalize(patterns, default, root, cwd, auditor, warn)
                         kindpats = []
                         for kind, pats, source in kp:
                             if kind not in (b're', b'relre'):  # regex can't be normalized
                                 p = pats
                                 pats = dsnormalize(pats)
                                 # Preserve the original to handle a case only rename.
                                 if p != pats and p in dirstate:
                                     kindpats.append((kind, p, source))
                             kindpats.append((kind, pats, source))
                         return kindpats
                 if patterns:
                     kindpats = normalize(patterns, default, root, cwd, auditor, warn)
                     if _kindpatsalwaysmatch(kindpats):
                         m = alwaysmatcher(badfn)
                     else:
                         m = _buildkindpatsmatcher(
                             patternmatcher,
                             root,
                             kindpats,
                             ctx=ctx,
                             listsubrepos=listsubrepos,
                             badfn=badfn,
                         )
                 else:
                     # It's a little strange that no patterns means to match everything.
                     # Consider changing this to match nothing (probably using nevermatcher).
                     m = alwaysmatcher(badfn)
                 if include:
                     kindpats = normalize(include, b'glob', root, cwd, auditor, warn)
                     im = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = intersectmatchers(m, im)
                 if exclude:
                     kindpats = normalize(exclude, b'glob', root, cwd, auditor, warn)
                     em = _buildkindpatsmatcher(
                         includematcher,
                         root,
                         kindpats,
                         ctx=ctx,
                         listsubrepos=listsubrepos,
                         badfn=None,
                     )
                     m = differencematcher(m, em)
                 return m
             def exact(files, badfn=None):
                 return exactmatcher(files, badfn=badfn)
             def always(badfn=None):
                 return alwaysmatcher(badfn)
             def never(badfn=None):
                 return nevermatcher(badfn)
             def badmatch(match, badfn):
                 """Make a copy of the given matcher, replacing its bad method with the given
                 one.
                 """
                 m = copy.copy(match)
                 m.bad = badfn
                 return m
             def _donormalize(patterns, default, root, cwd, auditor=None, warn=None):
                 '''Convert 'kind:pat' from the patterns list to tuples with kind and
                 normalized and rooted patterns and with listfiles expanded.'''
                 kindpats = []
                 for kind, pat in [_patsplit(p, default) for p in patterns]:
                     if kind in cwdrelativepatternkinds:
                         pat = pathutil.canonpath(root, cwd, pat, auditor=auditor)
                     elif kind in (b'relglob', b'path', b'rootfilesin', b'rootglob'):
                         pat = util.normpath(pat)
                     elif kind in (b'listfile', b'listfile0'):
                         try:
                             files = util.readfile(pat)
                             if kind == b'listfile0':
                                 files = files.split(b'\0')
                             else:
                                 files = files.splitlines()
                             files = [f for f in files if f]
                         except EnvironmentError:
                             raise error.Abort(_(b"unable to read file list (%s)") % pat)
                         for k, p, source in _donormalize(
                             files, default, root, cwd, auditor, warn
                         ):
                             kindpats.append((k, p, pat))
                         continue
                     elif kind == b'include':
                         try:
                             fullpath = os.path.join(root, util.localpath(pat))
                             includepats = readpatternfile(fullpath, warn)
                             for k, p, source in _donormalize(
                                 includepats, default, root, cwd, auditor, warn
                             ):
                                 kindpats.append((k, p, source or pat))
                         except error.Abort as inst:
                             raise error.Abort(
                                 b'%s: %s'
                                 % (pat, inst[0])  # pytype: disable=unsupported-operands
                             )
                         except IOError as inst:
                             if warn:
                                 warn(
                                     _(b"skipping unreadable pattern file '%s': %s\n")
                                     % (pat, stringutil.forcebytestr(inst.strerror))
                                 )
                         continue
                     # else: re or relre - which cannot be normalized
                     kindpats.append((kind, pat, b''))
                 return kindpats
             class basematcher(object):
                 def __init__(self, badfn=None):
                     if badfn is not None:
                         self.bad = badfn
                 def __call__(self, fn):
                     return self.matchfn(fn)
                 # Callbacks related to how the matcher is used by dirstate.walk.
                 # Subscribers to these events must monkeypatch the matcher object.
                 def bad(self, f, msg):
                     '''Callback from dirstate.walk for each explicit file that can't be
                     found/accessed, with an error message.'''
                 # If an traversedir is set, it will be called when a directory discovered
                 # by recursive traversal is visited.
                 traversedir = None
                 @propertycache
                 def _files(self):
                     return []
                 def files(self):
                     '''Explicitly listed files or patterns or roots:
                     if no patterns or .always(): empty list,
                     if exact: list exact files,
                     if not .anypats(): list all files and dirs,
                     else: optimal roots'''
                     return self._files
                 @propertycache
                 def _fileset(self):
                     return set(self._files)
                 def exact(self, f):
                     '''Returns True if f is in .files().'''
                     return f in self._fileset
                 def matchfn(self, f):
                     return False
                 def visitdir(self, dir):
                     '''Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories. This is
                     based on the match's primary, included, and excluded patterns.
                     Returns the string 'all' if the given directory and all subdirectories
                     should be visited. Otherwise returns True or False indicating whether
                     the given directory should be visited.
                     '''
                     return True
                 def visitchildrenset(self, dir):
                     '''Decides whether a directory should be visited based on whether it
                     has potential matches in it or one of its subdirectories, and
                     potentially lists which subdirectories of that directory should be
                     visited. This is based on the match's primary, included, and excluded
                     patterns.
                     This function is very similar to 'visitdir', and the following mapping
                     can be applied:
                          visitdir | visitchildrenlist
                         ----------+-------------------
                          False    | set()
                          'all'    | 'all'
                          True     | 'this' OR non-empty set of subdirs -or files- to visit
                     Example:
                       Assume matchers ['path:foo/bar', 'rootfilesin:qux'], we would return
                       the following values (assuming the implementation of visitchildrenset
                       is capable of recognizing this; some implementations are not).
                       '' -> {'foo', 'qux'}
                       'baz' -> set()
                       'foo' -> {'bar'}
                       # Ideally this would be 'all', but since the prefix nature of matchers
                       # is applied to the entire matcher, we have to downgrade this to
                       # 'this' due to the non-prefix 'rootfilesin'-kind matcher being mixed
                       # in.
                       'foo/bar' -> 'this'
                       'qux' -> 'this'
                     Important:
                       Most matchers do not know if they're representing files or
                       directories. They see ['path:dir/f'] and don't know whether 'f' is a
                       file or a directory, so visitchildrenset('dir') for most matchers will
                       return {'f'}, but if the matcher knows it's a file (like exactmatcher
                       does), it may return 'this'. Do not rely on the return being a set
                       indicating that there are no files in this dir to investigate (or
                       equivalently that if there are files to investigate in 'dir' that it
                       will always return 'this').
                     '''
                     return b'this'
                 def always(self):
                     '''Matcher will match everything and .files() will be empty --
                     optimization might be possible.'''
                     return False
                 def isexact(self):
                     '''Matcher will match exactly the list of files in .files() --
                     optimization might be possible.'''
                     return False
                 def prefix(self):
                     '''Matcher will match the paths in .files() recursively --
                     optimization might be possible.'''
                     return False
                 def anypats(self):
                     '''None of .always(), .isexact(), and .prefix() is true --
                     optimizations will be difficult.'''
                     return not self.always() and not self.isexact() and not self.prefix()
             class alwaysmatcher(basematcher):
                 '''Matches everything.'''
                 def __init__(self, badfn=None):
                     super(alwaysmatcher, self).__init__(badfn)
                 def always(self):
                     return True
                 def matchfn(self, f):
                     return True
                 def visitdir(self, dir):
                     return b'all'
                 def visitchildrenset(self, dir):
                     return b'all'
                 def __repr__(self):
                     return r'<alwaysmatcher>'
             class nevermatcher(basematcher):
                 '''Matches nothing.'''
                 def __init__(self, badfn=None):
                     super(nevermatcher, self).__init__(badfn)
                 # It's a little weird to say that the nevermatcher is an exact matcher
                 # or a prefix matcher, but it seems to make sense to let callers take
                 # fast paths based on either. There will be no exact matches, nor any
                 # prefixes (files() returns []), so fast paths iterating over them should
                 # be efficient (and correct).
                 def isexact(self):
                     return True
                 def prefix(self):
                     return True
                 def visitdir(self, dir):
                     return False
                 def visitchildrenset(self, dir):
                     return set()
                 def __repr__(self):
                     return r'<nevermatcher>'
             class predicatematcher(basematcher):
                 """A matcher adapter for a simple boolean function"""
                 def __init__(self, predfn, predrepr=None, badfn=None):
                     super(predicatematcher, self).__init__(badfn)
                     self.matchfn = predfn
                     self._predrepr = predrepr
                 @encoding.strmethod
                 def __repr__(self):
                     s = stringutil.buildrepr(self._predrepr) or pycompat.byterepr(
                         self.matchfn
                     )
                     return b'<predicatenmatcher pred=%s>' % s
             class patternmatcher(basematcher):
                 r"""Matches a set of (kind, pat, source) against a 'root' directory.
                 >>> kindpats = [
                 ...     (b're', br'.*\.c$', b''),
                 ...     (b'path', b'foo/a', b''),
                 ...     (b'relpath', b'b', b''),
                 ...     (b'glob', b'*.h', b''),
                 ... ]
                 >>> m = patternmatcher(b'foo', kindpats)
                 >>> m(b'main.c')  # matches re:.*\.c$
                 True
                 >>> m(b'b.txt')
                 False
                 >>> m(b'foo/a')  # matches path:foo/a
                 True
                 >>> m(b'a')  # does not match path:b, since 'root' is 'foo'
                 False
                 >>> m(b'b')  # matches relpath:b, since 'root' is 'foo'
                 True
                 >>> m(b'lib.h')  # matches glob:*.h
                 True
                 >>> m.files()
                 ['', 'foo/a', 'b', '']
                 >>> m.exact(b'foo/a')
                 True
                 >>> m.exact(b'b')
                 True
                 >>> m.exact(b'lib.h')  # exact matches are for (rel)path kinds
                 False
                 """
                 def __init__(self, root, kindpats, badfn=None):
                     super(patternmatcher, self).__init__(badfn)
                     self._files = _explicitfiles(kindpats)
                     self._prefix = _prefix(kindpats)
                     self._pats, self.matchfn = _buildmatch(kindpats, b'$', root)
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     if self._prefix and dir in self._fileset:
                         return b'all'
                     return (
                         dir in self._fileset
                         or dir in self._dirs
                         or any(
                             parentdir in self._fileset
                             for parentdir in pathutil.finddirs(dir)
                         )
                     )
                 def visitchildrenset(self, dir):
                     ret = self.visitdir(dir)
                     if ret is True:
                         return b'this'
                     elif not ret:
                         return set()
                     assert ret == b'all'
                     return b'all'
                 def prefix(self):
                     return self._prefix
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)
             # This is basically a reimplementation of pathutil.dirs that stores the
             # children instead of just a count of them, plus a small optional optimization
             # to avoid some directories we don't need.
             class _dirchildren(object):
                 def __init__(self, paths, onlyinclude=None):
                     self._dirs = {}
                     self._onlyinclude = onlyinclude or []
                     addpath = self.addpath
                     for f in paths:
                         addpath(f)
                 def addpath(self, path):
                     if path == b'':
                         return
                     dirs = self._dirs
                     findsplitdirs = _dirchildren._findsplitdirs
                     for d, b in findsplitdirs(path):
                         if d not in self._onlyinclude:
                             continue
                         dirs.setdefault(d, set()).add(b)
                 @staticmethod
                 def _findsplitdirs(path):
                     # yields (dirname, basename) tuples, walking back to the root.  This is
                     # very similar to pathutil.finddirs, except:
                     #  - produces a (dirname, basename) tuple, not just 'dirname'
                     # Unlike manifest._splittopdir, this does not suffix `dirname` with a
                     # slash.
                     oldpos = len(path)
                     pos = path.rfind(b'/')
                     while pos != -1:
                         yield path[:pos], path[pos + 1 : oldpos]
                         oldpos = pos
                         pos = path.rfind(b'/', 0, pos)
                     yield b'', path[:oldpos]
                 def get(self, path):
                     return self._dirs.get(path, set())
             class includematcher(basematcher):
                 def __init__(self, root, kindpats, badfn=None):
                     super(includematcher, self).__init__(badfn)
                     self._pats, self.matchfn = _buildmatch(kindpats, b'(?:/|$)', root)
                     self._prefix = _prefix(kindpats)
                     roots, dirs, parents = _rootsdirsandparents(kindpats)
                     # roots are directories which are recursively included.
                     self._roots = set(roots)
                     # dirs are directories which are non-recursively included.
                     self._dirs = set(dirs)
                     # parents are directories which are non-recursively included because
                     # they are needed to get to items in _dirs or _roots.
                     self._parents = parents
                 def visitdir(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     return (
                         dir in self._roots
                         or dir in self._dirs
                         or dir in self._parents
                         or any(
                             parentdir in self._roots for parentdir in pathutil.finddirs(dir)
                         )
                     )
                 @propertycache
                 def _allparentschildren(self):
                     # It may seem odd that we add dirs, roots, and parents, and then
                     # restrict to only parents. This is to catch the case of:
                     #   dirs = ['foo/bar']
                     #   parents = ['foo']
                     # if we asked for the children of 'foo', but had only added
                     # self._parents, we wouldn't be able to respond ['bar'].
                     return _dirchildren(
                         itertools.chain(self._dirs, self._roots, self._parents),
                         onlyinclude=self._parents,
                     )
                 def visitchildrenset(self, dir):
                     if self._prefix and dir in self._roots:
                         return b'all'
                     # Note: this does *not* include the 'dir in self._parents' case from
                     # visitdir, that's handled below.
                     if (
                         b'' in self._roots
                         or dir in self._roots
                         or dir in self._dirs
                         or any(
                             parentdir in self._roots for parentdir in pathutil.finddirs(dir)
                         )
                     ):
                         return b'this'
                     if dir in self._parents:
                         return self._allparentschildren.get(dir) or set()
                     return set()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<includematcher includes=%r>' % pycompat.bytestr(self._pats)
             class exactmatcher(basematcher):
                 r'''Matches the input files exactly. They are interpreted as paths, not
                 patterns (so no kind-prefixes).
                 >>> m = exactmatcher([b'a.txt', br're:.*\.c$'])
                 >>> m(b'a.txt')
                 True
                 >>> m(b'b.txt')
                 False
                 Input files that would be matched are exactly those returned by .files()
                 >>> m.files()
                 ['a.txt', 're:.*\\.c$']
                 So pattern 're:.*\.c$' is not considered as a regex, but as a file name
                 >>> m(b'main.c')
                 False
                 >>> m(br're:.*\.c$')
                 True
                 '''
                 def __init__(self, files, badfn=None):
                     super(exactmatcher, self).__init__(badfn)
                     if isinstance(files, list):
                         self._files = files
                     else:
                         self._files = list(files)
                 matchfn = basematcher.exact
                 @propertycache
                 def _dirs(self):
                     return set(pathutil.dirs(self._fileset))
                 def visitdir(self, dir):
                     return dir in self._dirs
                 def visitchildrenset(self, dir):
                     if not self._fileset or dir not in self._dirs:
                         return set()
                     candidates = self._fileset | self._dirs - {b''}
                     if dir != b'':
                         d = dir + b'/'
                         candidates = set(c[len(d) :] for c in candidates if c.startswith(d))
                     # self._dirs includes all of the directories, recursively, so if
                     # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
                     # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
                     # '/' in it, indicating a it's for a subdir-of-a-subdir; the
                     # immediate subdir will be in there without a slash.
                     ret = {c for c in candidates if b'/' not in c}
                     # We really do not expect ret to be empty, since that would imply that
                     # there's something in _dirs that didn't have a file in _fileset.
                     assert ret
                     return ret
                 def isexact(self):
                     return True
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<exactmatcher files=%r>' % self._files
             class differencematcher(basematcher):
                 '''Composes two matchers by matching if the first matches and the second
                 does not.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 '''
                 def __init__(self, m1, m2):
                     super(differencematcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 def matchfn(self, f):
                     return self._m1(f) and not self._m2(f)
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         return [f for f in self._m1.files() if self(f)]
                     # If m1 is not an exact matcher, we can't easily figure out the set of
                     # files, because its files() are not always files. For example, if
                     # m1 is "path:dir" and m2 is "rootfileins:.", we don't
                     # want to remove "dir" from the set even though it would match m2,
                     # because the "dir" in m1 may not be a file.
                     return self._m1.files()
                 def visitdir(self, dir):
                     if self._m2.visitdir(dir) == b'all':
                         return False
                     elif not self._m2.visitdir(dir):
                         # m2 does not match dir, we can return 'all' here if possible
                         return self._m1.visitdir(dir)
                     return bool(self._m1.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m2_set = self._m2.visitchildrenset(dir)
                     if m2_set == b'all':
                         return set()
                     m1_set = self._m1.visitchildrenset(dir)
                     # Possible values for m1: 'all', 'this', set(...), set()
                     # Possible values for m2:        'this', set(...), set()
                     # If m2 has nothing under here that we care about, return m1, even if
                     # it's 'all'. This is a change in behavior from visitdir, which would
                     # return True, not 'all', for some reason.
                     if not m2_set:
                         return m1_set
                     if m1_set in [b'all', b'this']:
                         # Never return 'all' here if m2_set is any kind of non-empty (either
                         # 'this' or set(foo)), since m2 might return set() for a
                         # subdirectory.
                         return b'this'
                     # Possible values for m1:         set(...), set()
                     # Possible values for m2: 'this', set(...)
                     # We ignore m2's set results. They're possibly incorrect:
                     #  m1 = path:dir/subdir, m2=rootfilesin:dir, visitchildrenset(''):
                     #    m1 returns {'dir'}, m2 returns {'dir'}, if we subtracted we'd
                     #    return set(), which is *not* correct, we still need to visit 'dir'!
                     return m1_set
                 def isexact(self):
                     return self._m1.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<differencematcher m1=%r, m2=%r>' % (self._m1, self._m2)
             def intersectmatchers(m1, m2):
                 '''Composes two matchers by matching if both of them match.
                 The second matcher's non-matching-attributes (bad, traversedir) are ignored.
                 '''
                 if m1 is None or m2 is None:
                     return m1 or m2
                 if m1.always():
                     m = copy.copy(m2)
                     # TODO: Consider encapsulating these things in a class so there's only
                     # one thing to copy from m1.
                     m.bad = m1.bad
                     m.traversedir = m1.traversedir
                     return m
                 if m2.always():
                     m = copy.copy(m1)
                     return m
                 return intersectionmatcher(m1, m2)
             class intersectionmatcher(basematcher):
                 def __init__(self, m1, m2):
                     super(intersectionmatcher, self).__init__()
                     self._m1 = m1
                     self._m2 = m2
                     self.bad = m1.bad
                     self.traversedir = m1.traversedir
                 @propertycache
                 def _files(self):
                     if self.isexact():
                         m1, m2 = self._m1, self._m2
                         if not m1.isexact():
                             m1, m2 = m2, m1
                         return [f for f in m1.files() if m2(f)]
                     # It neither m1 nor m2 is an exact matcher, we can't easily intersect
                     # the set of files, because their files() are not always files. For
                     # example, if intersecting a matcher "-I glob:foo.txt" with matcher of
                     # "path:dir2", we don't want to remove "dir2" from the set.
                     return self._m1.files() + self._m2.files()
                 def matchfn(self, f):
                     return self._m1(f) and self._m2(f)
                 def visitdir(self, dir):
                     visit1 = self._m1.visitdir(dir)
                     if visit1 == b'all':
                         return self._m2.visitdir(dir)
                     # bool() because visit1=True + visit2='all' should not be 'all'
                     return bool(visit1 and self._m2.visitdir(dir))
                 def visitchildrenset(self, dir):
                     m1_set = self._m1.visitchildrenset(dir)
                     if not m1_set:
                         return set()
                     m2_set = self._m2.visitchildrenset(dir)
                     if not m2_set:
                         return set()
                     if m1_set == b'all':
                         return m2_set
                     elif m2_set == b'all':
                         return m1_set
                     if m1_set == b'this' or m2_set == b'this':
                         return b'this'
                     assert isinstance(m1_set, set) and isinstance(m2_set, set)
                     return m1_set.intersection(m2_set)
                 def always(self):
                     return self._m1.always() and self._m2.always()
                 def isexact(self):
                     return self._m1.isexact() or self._m2.isexact()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<intersectionmatcher m1=%r, m2=%r>' % (self._m1, self._m2)
             class subdirmatcher(basematcher):
                 """Adapt a matcher to work on a subdirectory only.
                 The paths are remapped to remove/insert the path as needed:
                 >>> from . import pycompat
                 >>> m1 = match(util.localpath(b'/root'), b'', [b'a.txt', b'sub/b.txt'])
                 >>> m2 = subdirmatcher(b'sub', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'b.txt')
                 True
                 >>> m2.matchfn(b'a.txt')
                 False
                 >>> m2.matchfn(b'b.txt')
                 True
                 >>> m2.files()
                 ['b.txt']
                 >>> m2.exact(b'b.txt')
                 True
                 >>> def bad(f, msg):
                 ...     print(pycompat.sysstr(b"%s: %s" % (f, msg)))
                 >>> m1.bad = bad
                 >>> m2.bad(b'x.txt', b'No such file')
                 sub/x.txt: No such file
                 """
                 def __init__(self, path, matcher):
                     super(subdirmatcher, self).__init__()
                     self._path = path
                     self._matcher = matcher
                     self._always = matcher.always()
                     self._files = [
                         f[len(path) + 1 :]
                         for f in matcher._files
                         if f.startswith(path + b"/")
                     ]
                     # If the parent repo had a path to this subrepo and the matcher is
                     # a prefix matcher, this submatcher always matches.
                     if matcher.prefix():
                         self._always = any(f == path for f in matcher._files)
                 def bad(self, f, msg):
                     self._matcher.bad(self._path + b"/" + f, msg)
                 def matchfn(self, f):
                     # Some information is lost in the superclass's constructor, so we
                     # can not accurately create the matching function for the subdirectory
                     # from the inputs. Instead, we override matchfn() and visitdir() to
                     # call the original matcher with the subdirectory path prepended.
                     return self._matcher.matchfn(self._path + b"/" + f)
                 def visitdir(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitdir(dir)
                 def visitchildrenset(self, dir):
                     if dir == b'':
                         dir = self._path
                     else:
                         dir = self._path + b"/" + dir
                     return self._matcher.visitchildrenset(dir)
                 def always(self):
                     return self._always
                 def prefix(self):
                     return self._matcher.prefix() and not self._always
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<subdirmatcher path=%r, matcher=%r>' % (
                         self._path,
                         self._matcher,
                     )
             class prefixdirmatcher(basematcher):
                 """Adapt a matcher to work on a parent directory.
                 The matcher's non-matching-attributes (bad, traversedir) are ignored.
                 The prefix path should usually be the relative path from the root of
                 this matcher to the root of the wrapped matcher.
                 >>> m1 = match(util.localpath(b'/root/d/e'), b'f', [b'../a.txt', b'b.txt'], auditor=lambda name: None)
                 >>> m2 = prefixdirmatcher(b'd/e', m1)
                 >>> m2(b'a.txt')
                 False
                 >>> m2(b'd/e/a.txt')
                 True
                 >>> m2(b'd/e/b.txt')
                 False
                 >>> m2.files()
                 ['d/e/a.txt', 'd/e/f/b.txt']
                 >>> m2.exact(b'd/e/a.txt')
                 True
                 >>> m2.visitdir(b'd')
                 True
                 >>> m2.visitdir(b'd/e')
                 True
                 >>> m2.visitdir(b'd/e/f')
                 True
                 >>> m2.visitdir(b'd/e/g')
                 False
                 >>> m2.visitdir(b'd/ef')
                 False
                 """
                 def __init__(self, path, matcher, badfn=None):
                     super(prefixdirmatcher, self).__init__(badfn)
                     if not path:
                         raise error.ProgrammingError(b'prefix path must not be empty')
                     self._path = path
                     self._pathprefix = path + b'/'
                     self._matcher = matcher
                 @propertycache
                 def _files(self):
                     return [self._pathprefix + f for f in self._matcher._files]
                 def matchfn(self, f):
                     if not f.startswith(self._pathprefix):
                         return False
                     return self._matcher.matchfn(f[len(self._pathprefix) :])
                 @propertycache
                 def _pathdirs(self):
                     return set(pathutil.finddirs(self._path))
                 def visitdir(self, dir):
                     if dir == self._path:
                         return self._matcher.visitdir(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitdir(dir[len(self._pathprefix) :])
                     return dir in self._pathdirs
                 def visitchildrenset(self, dir):
                     if dir == self._path:
                         return self._matcher.visitchildrenset(b'')
                     if dir.startswith(self._pathprefix):
                         return self._matcher.visitchildrenset(dir[len(self._pathprefix) :])
                     if dir in self._pathdirs:
                         return b'this'
                     return set()
                 def isexact(self):
                     return self._matcher.isexact()
                 def prefix(self):
                     return self._matcher.prefix()
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<prefixdirmatcher path=%r, matcher=%r>' % (
                         pycompat.bytestr(self._path),
                         self._matcher,
                     )
             class unionmatcher(basematcher):
                 """A matcher that is the union of several matchers.
                 The non-matching-attributes (bad, traversedir) are taken from the first
                 matcher.
                 """
                 def __init__(self, matchers):
                     m1 = matchers[0]
                     super(unionmatcher, self).__init__()
                     self.traversedir = m1.traversedir
                     self._matchers = matchers
                 def matchfn(self, f):
                     for match in self._matchers:
                         if match(f):
                             return True
                     return False
                 def visitdir(self, dir):
                     r = False
                     for m in self._matchers:
                         v = m.visitdir(dir)
                         if v == b'all':
                             return v
                         r |= v
                     return r
                 def visitchildrenset(self, dir):
                     r = set()
                     this = False
                     for m in self._matchers:
                         v = m.visitchildrenset(dir)
                         if not v:
                             continue
                         if v == b'all':
                             return v
                         if this or v == b'this':
                             this = True
                             # don't break, we might have an 'all' in here.
                             continue
                         assert isinstance(v, set)
                         r = r.union(v)
                     if this:
                         return b'this'
                     return r
                 @encoding.strmethod
                 def __repr__(self):
                     return b'<unionmatcher matchers=%r>' % self._matchers
             def patkind(pattern, default=None):
                 r'''If pattern is 'kind:pat' with a known kind, return kind.
                 >>> patkind(br're:.*\.c$')
                 're'
                 >>> patkind(b'glob:*.c')
                 'glob'
                 >>> patkind(b'relpath:test.py')
                 'relpath'
                 >>> patkind(b'main.py')
                 >>> patkind(b'main.py', default=b're')
                 're'
                 '''
                 return _patsplit(pattern, default)[0]
             def _patsplit(pattern, default):
                 """Split a string into the optional pattern kind prefix and the actual
                 pattern."""
                 if b':' in pattern:
                     kind, pat = pattern.split(b':', 1)
                     if kind in allpatternkinds:
                         return kind, pat
                 return default, pattern
             def _globre(pat):
                 r'''Convert an extended glob string to a regexp string.
                 >>> from . import pycompat
                 >>> def bprint(s):
                 ...     print(pycompat.sysstr(s))
                 >>> bprint(_globre(br'?'))
                 .
                 >>> bprint(_globre(br'*'))
                 [^/]*
                 >>> bprint(_globre(br'**'))
                 .*
                 >>> bprint(_globre(br'**/a'))
                 (?:.*/)?a
                 >>> bprint(_globre(br'a/**/b'))
                 a/(?:.*/)?b
                 >>> bprint(_globre(br'[a*?!^][^b][!c]'))
                 [a*?!^][\^b][^c]
                 >>> bprint(_globre(br'{a,b}'))
                 (?:a|b)
                 >>> bprint(_globre(br'.\*\?'))
                 \.\*\?
                 '''
                 i, n = 0, len(pat)
                 res = b''
                 group = 0
                 escape = util.stringutil.regexbytesescapemap.get
                 def peek():
                     return i < n and pat[i : i + 1]
                 while i < n:
                     c = pat[i : i + 1]
                     i += 1
                     if c not in b'*?[{},\\':
                         res += escape(c, c)
                     elif c == b'*':
                         if peek() == b'*':
                             i += 1
                             if peek() == b'/':
                                 i += 1
                                 res += b'(?:.*/)?'
                             else:
                                 res += b'.*'
                         else:
                             res += b'[^/]*'
                     elif c == b'?':
                         res += b'.'
                     elif c == b'[':
                         j = i
                         if j < n and pat[j : j + 1] in b'!]':
                             j += 1
                         while j < n and pat[j : j + 1] != b']':
                             j += 1
                         if j >= n:
                             res += b'\\['
                         else:
                             stuff = pat[i:j].replace(b'\\', b'\\\\')
                             i = j + 1
                             if stuff[0:1] == b'!':
                                 stuff = b'^' + stuff[1:]
                             elif stuff[0:1] == b'^':
                                 stuff = b'\\' + stuff
                             res = b'%s[%s]' % (res, stuff)
                     elif c == b'{':
                         group += 1
                         res += b'(?:'
                     elif c == b'}' and group:
                         res += b')'
                         group -= 1
                     elif c == b',' and group:
                         res += b'|'
                     elif c == b'\\':
                         p = peek()
                         if p:
                             i += 1
                             res += escape(p, p)
                         else:
                             res += escape(c, c)
                     else:
                         res += escape(c, c)
                 return res
             def _regex(kind, pat, globsuffix):
                 '''Convert a (normalized) pattern of any kind into a
                 regular expression.
                 globsuffix is appended to the regexp of globs.'''
                 if rustmod is not None:
                     try:
                         return rustmod.build_single_regex(kind, pat, globsuffix)
                     except rustmod.PatternError:
                         raise error.ProgrammingError(
                             b'not a regex pattern: %s:%s' % (kind, pat)
                         )
                 if not pat and kind in (b'glob', b'relpath'):
                     return b''
                 if kind == b're':
                     return pat
                 if kind in (b'path', b'relpath'):
                     if pat == b'.':
                         return b''
                     return util.stringutil.reescape(pat) + b'(?:/|$)'
                 if kind == b'rootfilesin':
                     if pat == b'.':
                         escaped = b''
                     else:
                         # Pattern is a directory name.
                         escaped = util.stringutil.reescape(pat) + b'/'
                     # Anything after the pattern must be a non-directory.
                     return escaped + b'[^/]+$'
                 if kind == b'relglob':
                     globre = _globre(pat)
                     if globre.startswith(b'[^/]*'):
                         # When pat has the form *XYZ (common), make the returned regex more
                         # legible by returning the regex for **XYZ instead of **/*XYZ.
                         return b'.*' + globre[len(b'[^/]*') :] + globsuffix
                     return b'(?:|.*/)' + globre + globsuffix
                 if kind == b'relre':
                     if pat.startswith(b'^'):
                         return pat
                     return b'.*' + pat
                 if kind in (b'glob', b'rootglob'):
                     return _globre(pat) + globsuffix
                 raise error.ProgrammingError(b'not a regex pattern: %s:%s' % (kind, pat))
             def _buildmatch(kindpats, globsuffix, root):
                 '''Return regexp string and a matcher function for kindpats.
                 globsuffix is appended to the regexp of globs.'''
                 matchfuncs = []
                 subincludes, kindpats = _expandsubinclude(kindpats, root)
                 if subincludes:
                     submatchers = {}
                     def matchsubinclude(f):
                         for prefix, matcherargs in subincludes:
                             if f.startswith(prefix):
                                 mf = submatchers.get(prefix)
                                 if mf is None:
                                     mf = match(*matcherargs)
                                     submatchers[prefix] = mf
                                 if mf(f[len(prefix) :]):
                                     return True
                         return False
                     matchfuncs.append(matchsubinclude)
                 regex = b''
                 if kindpats:
                     if all(k == b'rootfilesin' for k, p, s in kindpats):
                         dirs = {p for k, p, s in kindpats}
                         def mf(f):
                             i = f.rfind(b'/')
                             if i >= 0:
                                 dir = f[:i]
                             else:
                                 dir = b'.'
                             return dir in dirs
                         regex = b'rootfilesin: %s' % stringutil.pprint(list(sorted(dirs)))
                         matchfuncs.append(mf)
                     else:
                         regex, mf = _buildregexmatch(kindpats, globsuffix)
                         matchfuncs.append(mf)
                 if len(matchfuncs) == 1:
                     return regex, matchfuncs[0]
                 else:
                     return regex, lambda f: any(mf(f) for mf in matchfuncs)
             MAX_RE_SIZE = 20000
             def _joinregexes(regexps):
                 """gather multiple regular expressions into a single one"""
                 return b'|'.join(regexps)
             def _buildregexmatch(kindpats, globsuffix):
                 """Build a match function from a list of kinds and kindpats,
                 return regexp string and a matcher function.
                 Test too large input
                 >>> _buildregexmatch([
                 ...     (b'relglob', b'?' * MAX_RE_SIZE, b'')
                 ... ], b'$')
                 Traceback (most recent call last):
                 ...
                 Abort: matcher pattern is too long (20009 bytes)
                 """
                 try:
                     allgroups = []
                     regexps = [_regex(k, p, globsuffix) for (k, p, s) in kindpats]
                     fullregexp = _joinregexes(regexps)
                     startidx = 0
                     groupsize = 0
                     for idx, r in enumerate(regexps):
                         piecesize = len(r)
                         if piecesize > MAX_RE_SIZE:
                             msg = _(b"matcher pattern is too long (%d bytes)") % piecesize
                             raise error.Abort(msg)
                         elif (groupsize + piecesize) > MAX_RE_SIZE:
                             group = regexps[startidx:idx]
                             allgroups.append(_joinregexes(group))
                             startidx = idx
                             groupsize = 0
                         groupsize += piecesize + 1
                     if startidx == 0:
                         matcher = _rematcher(fullregexp)
                         func = lambda s: bool(matcher(s))
                     else:
                         group = regexps[startidx:]
                         allgroups.append(_joinregexes(group))
                         allmatchers = [_rematcher(g) for g in allgroups]
                         func = lambda s: any(m(s) for m in allmatchers)
                     return fullregexp, func
                 except re.error:
                     for k, p, s in kindpats:
                         try:
                             _rematcher(_regex(k, p, globsuffix))
                         except re.error:
                             if s:
                                 raise error.Abort(
                                     _(b"%s: invalid pattern (%s): %s") % (s, k, p)
                                 )
                             else:
                                 raise error.Abort(_(b"invalid pattern (%s): %s") % (k, p))
                     raise error.Abort(_(b"invalid pattern"))
             def _patternrootsanddirs(kindpats):
                 '''Returns roots and directories corresponding to each pattern.
                 This calculates the roots and directories exactly matching the patterns and
                 returns a tuple of (roots, dirs) for each. It does not return other
                 directories which may also need to be considered, like the parent
                 directories.
                 '''
                 r = []
                 d = []
                 for kind, pat, source in kindpats:
                     if kind in (b'glob', b'rootglob'):  # find the non-glob prefix
                         root = []
                         for p in pat.split(b'/'):
                             if b'[' in p or b'{' in p or b'*' in p or b'?' in p:
                                 break
                             root.append(p)
                         r.append(b'/'.join(root))
                     elif kind in (b'relpath', b'path'):
                         if pat == b'.':
                             pat = b''
                         r.append(pat)
                     elif kind in (b'rootfilesin',):
                         if pat == b'.':
                             pat = b''
                         d.append(pat)
                     else:  # relglob, re, relre
                         r.append(b'')
                 return r, d
             def _roots(kindpats):
                 '''Returns root directories to match recursively from the given patterns.'''
                 roots, dirs = _patternrootsanddirs(kindpats)
                 return roots
             def _rootsdirsandparents(kindpats):
                 '''Returns roots and exact directories from patterns.
                 `roots` are directories to match recursively, `dirs` should
                 be matched non-recursively, and `parents` are the implicitly required
                 directories to walk to items in either roots or dirs.
                 Returns a tuple of (roots, dirs, parents).
                 >>> r = _rootsdirsandparents(
                 ...     [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
                 ...      (b'glob', b'g*', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['g/h', 'g/h', ''], []) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 ([], ['g/h', '']) ['', 'g']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
                 ...      (b'path', b'', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['r', 'p/p', ''], []) ['', 'p']
                 >>> r = _rootsdirsandparents(
                 ...     [(b'relglob', b'rg*', b''), (b're', b're/', b''),
                 ...      (b'relre', b'rr', b'')])
                 >>> print(r[0:2], sorted(r[2])) # the set has an unstable output
                 (['', '', ''], []) ['']
                 '''
                 r, d = _patternrootsanddirs(kindpats)
                 p = set()
                 # Add the parents as non-recursive/exact directories, since they must be
                 # scanned to get to either the roots or the other exact directories.
                 p.update(pathutil.dirs(d))
                 p.update(pathutil.dirs(r))
                 # FIXME: all uses of this function convert these to sets, do so before
                 # returning.
                 # FIXME: all uses of this function do not need anything in 'roots' and
                 # 'dirs' to also be in 'parents', consider removing them before returning.
                 return r, d, p
             def _explicitfiles(kindpats):
                 '''Returns the potential explicit filenames from the patterns.
                 >>> _explicitfiles([(b'path', b'foo/bar', b'')])
                 ['foo/bar']
                 >>> _explicitfiles([(b'rootfilesin', b'foo/bar', b'')])
                 []
                 '''
                 # Keep only the pattern kinds where one can specify filenames (vs only
                 # directory names).
                 filable = [kp for kp in kindpats if kp[0] not in (b'rootfilesin',)]
                 return _roots(filable)
             def _prefix(kindpats):
                 '''Whether all the patterns match a prefix (i.e. recursively)'''
                 for kind, pat, source in kindpats:
                     if kind not in (b'path', b'relpath'):
                         return False
                 return True
             _commentre = None
             def readpatternfile(filepath, warn, sourceinfo=False):
                 '''parse a pattern file, returning a list of
                 patterns. These patterns should be given to compile()
                 to be validated and converted into a match function.
                 trailing white space is dropped.
                 the escape character is backslash.
                 comments start with #.
                 empty lines are skipped.
                 lines can be of the following formats:
                 syntax: regexp # defaults following lines to non-rooted regexps
                 syntax: glob   # defaults following lines to non-rooted globs
                 re:pattern     # non-rooted regular expression
                 glob:pattern   # non-rooted glob
                 rootglob:pat   # rooted glob (same root as ^ in regexps)
                 pattern        # pattern of the current default type
                 if sourceinfo is set, returns a list of tuples:
                 (pattern, lineno, originalline).
                 This is useful to debug ignore patterns.
                 '''
                 if rustmod is not None:
                     result, warnings = rustmod.read_pattern_file(
                         filepath, bool(warn), sourceinfo,
                     )
                     for warning_params in warnings:
                         # Can't be easily emitted from Rust, because it would require
                         # a mechanism for both gettext and calling the `warn` function.
                         warn(_(b"%s: ignoring invalid syntax '%s'\n") % warning_params)
                     return result
                 syntaxes = {
                     b're': b'relre:',
                     b'regexp': b'relre:',
                     b'glob': b'relglob:',
                     b'rootglob': b'rootglob:',
                     b'include': b'include',
                     b'subinclude': b'subinclude',
                 }
                 syntax = b'relre:'
                 patterns = []
                 fp = open(filepath, b'rb')
                 for lineno, line in enumerate(util.iterfile(fp), start=1):
                     if b"#" in line:
                         global _commentre
                         if not _commentre:
                             _commentre = util.re.compile(br'((?:^|[^\\])(?:\\\\)*)#.*')
                         # remove comments prefixed by an even number of escapes
                         m = _commentre.search(line)
                         if m:
                             line = line[: m.end(1)]
                         # fixup properly escaped comments that survived the above
                         line = line.replace(b"\\#", b"#")
                     line = line.rstrip()
                     if not line:
                         continue
                     if line.startswith(b'syntax:'):
                         s = line[7:].strip()
                         try:
                             syntax = syntaxes[s]
                         except KeyError:
                             if warn:
                                 warn(
                                     _(b"%s: ignoring invalid syntax '%s'\n") % (filepath, s)
                                 )
                         continue
                     linesyntax = syntax
                     for s, rels in pycompat.iteritems(syntaxes):
                         if line.startswith(rels):
                             linesyntax = rels
                             line = line[len(rels) :]
                             break
                         elif line.startswith(s + b':'):
                             linesyntax = rels
                             line = line[len(s) + 1 :]
                             break
                     if sourceinfo:
                         patterns.append((linesyntax + line, lineno, line))
                     else:
                         patterns.append(linesyntax + line)
                 fp.close()
                 return patterns