upstream/mercurial-mirror Commit - r50770:cd3b8fd1

1

# testparseutil.py - utilities to parse test script for check tools

1

# testparseutil.py - utilities to parse test script for check tools

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

9

import abc

9

import abc

10

import builtins

10

import re

11

import re

11

import sys

12

13

####################

13

####################

14

# for Python3 compatibility (almost comes from mercurial/pycompat.py)

14

# for Python3 compatibility (almost comes from mercurial/pycompat.py)

15

16

ispy3 = sys.version_info[0] >= 3

17

18

16

19

def identity(a):

17

def identity(a):

20

return a

18

return a

21

19

22

20

23

def _rapply(f, xs):

21

def _rapply(f, xs):

24

if xs is None:

22

if xs is None:

25

# assume None means non-value of optional data

23

# assume None means non-value of optional data

26

return xs

24

return xs

27

if isinstance(xs, (list, set, tuple)):

25

if isinstance(xs, (list, set, tuple)):

28

return type(xs)(_rapply(f, x) for x in xs)

26

return type(xs)(_rapply(f, x) for x in xs)

29

if isinstance(xs, dict):

27

if isinstance(xs, dict):

30

return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())

28

return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())

31

return f(xs)

29

return f(xs)

32

30

33

31

34

def rapply(f, xs):

32

def rapply(f, xs):

35

if f is identity:

33

if f is identity:

36

# fast path mainly for py2

34

# fast path mainly for py2

37

return xs

35

return xs

38

return _rapply(f, xs)

36

return _rapply(f, xs)

39

37

40

38

41

if ispy3:

39

def bytestr(s):

42

import builtins

40

# tiny version of pycompat.bytestr

43

41

return s.encode('latin1')

44

def bytestr(s):

45

# tiny version of pycompat.bytestr

46

return s.encode('latin1')

47

48

def sysstr(s):

49

if isinstance(s, builtins.str):

50

return s

51

return s.decode('latin-1')

52

53

def opentext(f):

54

return open(f, 'r')

55

42

56

43

57

else:

44

def sysstr(s):

58

bytestr = str

45

if isinstance(s, builtins.str):

59

sysstr = identity

46

return s

47

return s.decode('latin-1')

60

48

61

opentext = open

49

50

def opentext(f):

51

return open(f, 'r')

62

52

63

53

64

def b2s(x):

54

def b2s(x):

65

# convert BYTES elements in "x" to SYSSTR recursively

55

# convert BYTES elements in "x" to SYSSTR recursively

66

return rapply(sysstr, x)

56

return rapply(sysstr, x)

67

57

68

58

69

def writeout(data):

59

def writeout(data):

70

# write "data" in BYTES into stdout

60

# write "data" in BYTES into stdout

71

sys.stdout.write(data)

61

sys.stdout.write(data)

72

62

73

63

74

def writeerr(data):

64

def writeerr(data):

75

# write "data" in BYTES into stderr

65

# write "data" in BYTES into stderr

76

sys.stderr.write(data)

66

sys.stderr.write(data)

77

67

78

68

79

####################

69

####################

80

70

81

71

82

class embeddedmatcher: # pytype: disable=ignored-metaclass

72

class embeddedmatcher: # pytype: disable=ignored-metaclass

83

"""Base class to detect embedded code fragments in *.t test script"""

73

"""Base class to detect embedded code fragments in *.t test script"""

84

74

85

__metaclass__ = abc.ABCMeta

75

__metaclass__ = abc.ABCMeta

86

76

87

def __init__(self, desc):

77

def __init__(self, desc):

88

self.desc = desc

78

self.desc = desc

89

79

90

@abc.abstractmethod

80

@abc.abstractmethod

91

def startsat(self, line):

81

def startsat(self, line):

92

"""Examine whether embedded code starts at line

82

"""Examine whether embedded code starts at line

93

83

94

This can return arbitrary object, and it is used as 'ctx' for

84

This can return arbitrary object, and it is used as 'ctx' for

95

subsequent method invocations.

85

subsequent method invocations.

96

"""

86

"""

97

87

98

@abc.abstractmethod

88

@abc.abstractmethod

99

def endsat(self, ctx, line):

89

def endsat(self, ctx, line):

100

"""Examine whether embedded code ends at line"""

90

"""Examine whether embedded code ends at line"""

101

91

102

@abc.abstractmethod

92

@abc.abstractmethod

103

def isinside(self, ctx, line):

93

def isinside(self, ctx, line):

104

"""Examine whether line is inside embedded code, if not yet endsat"""

94

"""Examine whether line is inside embedded code, if not yet endsat"""

105

95

106

@abc.abstractmethod

96

@abc.abstractmethod

107

def ignores(self, ctx):

97

def ignores(self, ctx):

108

"""Examine whether detected embedded code should be ignored"""

98

"""Examine whether detected embedded code should be ignored"""

109

99

110

@abc.abstractmethod

100

@abc.abstractmethod

111

def filename(self, ctx):

101

def filename(self, ctx):

112

"""Return filename of embedded code

102

"""Return filename of embedded code

113

103

114

If filename isn't specified for embedded code explicitly, this

104

If filename isn't specified for embedded code explicitly, this

115

returns None.

105

returns None.

116

"""

106

"""

117

107

118

@abc.abstractmethod

108

@abc.abstractmethod

119

def codeatstart(self, ctx, line):

109

def codeatstart(self, ctx, line):

120

"""Return actual code at the start line of embedded code

110

"""Return actual code at the start line of embedded code

121

111

122

This might return None, if the start line doesn't contain

112

This might return None, if the start line doesn't contain

123

actual code.

113

actual code.

124

"""

114

"""

125

115

126

@abc.abstractmethod

116

@abc.abstractmethod

127

def codeatend(self, ctx, line):

117

def codeatend(self, ctx, line):

128

"""Return actual code at the end line of embedded code

118

"""Return actual code at the end line of embedded code

129

119

130

This might return None, if the end line doesn't contain actual

120

This might return None, if the end line doesn't contain actual

131

code.

121

code.

132

"""

122

"""

133

123

134

@abc.abstractmethod

124

@abc.abstractmethod

135

def codeinside(self, ctx, line):

125

def codeinside(self, ctx, line):

136

"""Return actual code at line inside embedded code"""

126

"""Return actual code at line inside embedded code"""

137

127

138

128

139

def embedded(basefile, lines, errors, matchers):

129

def embedded(basefile, lines, errors, matchers):

140

"""pick embedded code fragments up from given lines

130

"""pick embedded code fragments up from given lines

141

131

142

This is common parsing logic, which examines specified matchers on

132

This is common parsing logic, which examines specified matchers on

143

given lines.

133

given lines.

144

134

145

:basefile: a name of a file, from which lines to be parsed come.

135

:basefile: a name of a file, from which lines to be parsed come.

146

:lines: to be parsed (might be a value returned by "open(basefile)")

136

:lines: to be parsed (might be a value returned by "open(basefile)")

147

:errors: an array, into which messages for detected error are stored

137

:errors: an array, into which messages for detected error are stored

148

:matchers: an array of embeddedmatcher objects

138

:matchers: an array of embeddedmatcher objects

149

139

150

This function yields '(filename, starts, ends, code)' tuple.

140

This function yields '(filename, starts, ends, code)' tuple.

151

141

152

:filename: a name of embedded code, if it is explicitly specified

142

:filename: a name of embedded code, if it is explicitly specified

153

(e.g. "foobar" of "cat >> foobar <<EOF").

143

(e.g. "foobar" of "cat >> foobar <<EOF").

154

Otherwise, this is None

144

Otherwise, this is None

155

:starts: line number (1-origin), at which embedded code starts (inclusive)

145

:starts: line number (1-origin), at which embedded code starts (inclusive)

156

:ends: line number (1-origin), at which embedded code ends (exclusive)

146

:ends: line number (1-origin), at which embedded code ends (exclusive)

157

:code: extracted embedded code, which is single-stringified

147

:code: extracted embedded code, which is single-stringified

158

148

159

>>> class ambigmatcher:

149

>>> class ambigmatcher:

160

... # mock matcher class to examine implementation of

150

... # mock matcher class to examine implementation of

161

... # "ambiguous matching" corner case

151

... # "ambiguous matching" corner case

162

... def __init__(self, desc, matchfunc):

152

... def __init__(self, desc, matchfunc):

163

... self.desc = desc

153

... self.desc = desc

164

... self.matchfunc = matchfunc

154

... self.matchfunc = matchfunc

165

... def startsat(self, line):

155

... def startsat(self, line):

166

... return self.matchfunc(line)

156

... return self.matchfunc(line)

167

>>> ambig1 = ambigmatcher('ambiguous #1',

157

>>> ambig1 = ambigmatcher('ambiguous #1',

168

... lambda l: l.startswith(' $ cat '))

158

... lambda l: l.startswith(' $ cat '))

169

>>> ambig2 = ambigmatcher('ambiguous #2',

159

>>> ambig2 = ambigmatcher('ambiguous #2',

170

... lambda l: l.endswith('<< EOF\\n'))

160

... lambda l: l.endswith('<< EOF\\n'))

171

>>> lines = [' $ cat > foo.py << EOF\\n']

161

>>> lines = [' $ cat > foo.py << EOF\\n']

172

>>> errors = []

162

>>> errors = []

173

>>> matchers = [ambig1, ambig2]

163

>>> matchers = [ambig1, ambig2]

174

>>> list(t for t in embedded('<dummy>', lines, errors, matchers))

164

>>> list(t for t in embedded('<dummy>', lines, errors, matchers))

175

[]

165

[]

176

>>> b2s(errors)

166

>>> b2s(errors)

177

['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']

167

['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']

178

168

179

"""

169

"""

180

matcher = None

170

matcher = None

181

ctx = filename = code = startline = None # for pyflakes

171

ctx = filename = code = startline = None # for pyflakes

182

172

183

for lineno, line in enumerate(lines, 1):

173

for lineno, line in enumerate(lines, 1):

184

if not line.endswith('\n'):

174

if not line.endswith('\n'):

185

line += '\n' # to normalize EOF line

175

line += '\n' # to normalize EOF line

186

if matcher: # now, inside embedded code

176

if matcher: # now, inside embedded code

187

if matcher.endsat(ctx, line):

177

if matcher.endsat(ctx, line):

188

codeatend = matcher.codeatend(ctx, line)

178

codeatend = matcher.codeatend(ctx, line)

189

if codeatend is not None:

179

if codeatend is not None:

190

code.append(codeatend)

180

code.append(codeatend)

191

if not matcher.ignores(ctx):

181

if not matcher.ignores(ctx):

192

yield (filename, startline, lineno, ''.join(code))

182

yield (filename, startline, lineno, ''.join(code))

193

matcher = None

183

matcher = None

194

# DO NOT "continue", because line might start next fragment

184

# DO NOT "continue", because line might start next fragment

195

elif not matcher.isinside(ctx, line):

185

elif not matcher.isinside(ctx, line):

196

# this is an error of basefile

186

# this is an error of basefile

197

# (if matchers are implemented correctly)

187

# (if matchers are implemented correctly)

198

errors.append(

188

errors.append(

199

'%s:%d: unexpected line for "%s"'

189

'%s:%d: unexpected line for "%s"'

200

% (basefile, lineno, matcher.desc)

190

% (basefile, lineno, matcher.desc)

201

)

191

)

202

# stop extracting embedded code by current 'matcher',

192

# stop extracting embedded code by current 'matcher',

203

# because appearance of unexpected line might mean

193

# because appearance of unexpected line might mean

204

# that expected end-of-embedded-code line might never

194

# that expected end-of-embedded-code line might never

205

# appear

195

# appear

206

matcher = None

196

matcher = None

207

# DO NOT "continue", because line might start next fragment

197

# DO NOT "continue", because line might start next fragment

208

else:

198

else:

209

code.append(matcher.codeinside(ctx, line))

199

code.append(matcher.codeinside(ctx, line))

210

continue

200

continue

211

201

212

# examine whether current line starts embedded code or not

202

# examine whether current line starts embedded code or not

213

assert not matcher

203

assert not matcher

214

204

215

matched = []

205

matched = []

216

for m in matchers:

206

for m in matchers:

217

ctx = m.startsat(line)

207

ctx = m.startsat(line)

218

if ctx:

208

if ctx:

219

matched.append((m, ctx))

209

matched.append((m, ctx))

220

if matched:

210

if matched:

221

if len(matched) > 1:

211

if len(matched) > 1:

222

# this is an error of matchers, maybe

212

# this is an error of matchers, maybe

223

errors.append(

213

errors.append(

224

'%s:%d: ambiguous line for %s'

214

'%s:%d: ambiguous line for %s'

225

% (

215

% (

226

basefile,

216

basefile,

227

lineno,

217

lineno,

228

', '.join(['"%s"' % m.desc for m, c in matched]),

218

', '.join(['"%s"' % m.desc for m, c in matched]),

229

)

219

)

230

)

220

)

231

# omit extracting embedded code, because choosing

221

# omit extracting embedded code, because choosing

232

# arbitrary matcher from matched ones might fail to

222

# arbitrary matcher from matched ones might fail to

233

# detect the end of embedded code as expected.

223

# detect the end of embedded code as expected.

234

continue

224

continue

235

matcher, ctx = matched[0]

225

matcher, ctx = matched[0]

236

filename = matcher.filename(ctx)

226

filename = matcher.filename(ctx)

237

code = []

227

code = []

238

codeatstart = matcher.codeatstart(ctx, line)

228

codeatstart = matcher.codeatstart(ctx, line)

239

if codeatstart is not None:

229

if codeatstart is not None:

240

code.append(codeatstart)

230

code.append(codeatstart)

241

startline = lineno

231

startline = lineno

242

else:

232

else:

243

startline = lineno + 1

233

startline = lineno + 1

244

234

245

if matcher:

235

if matcher:

246

# examine whether EOF ends embedded code, because embedded

236

# examine whether EOF ends embedded code, because embedded

247

# code isn't yet ended explicitly

237

# code isn't yet ended explicitly

248

if matcher.endsat(ctx, '\n'):

238

if matcher.endsat(ctx, '\n'):

249

codeatend = matcher.codeatend(ctx, '\n')

239

codeatend = matcher.codeatend(ctx, '\n')

250

if codeatend is not None:

240

if codeatend is not None:

251

code.append(codeatend)

241

code.append(codeatend)

252

if not matcher.ignores(ctx):

242

if not matcher.ignores(ctx):

253

yield (filename, startline, lineno + 1, ''.join(code))

243

yield (filename, startline, lineno + 1, ''.join(code))

254

else:

244

else:

255

# this is an error of basefile

245

# this is an error of basefile

256

# (if matchers are implemented correctly)

246

# (if matchers are implemented correctly)

257

errors.append(

247

errors.append(

258

'%s:%d: unexpected end of file for "%s"'

248

'%s:%d: unexpected end of file for "%s"'

259

% (basefile, lineno, matcher.desc)

249

% (basefile, lineno, matcher.desc)

260

)

250

)

261

251

262

252

263

# heredoc limit mark to ignore embedded code at check-code.py or so

253

# heredoc limit mark to ignore embedded code at check-code.py or so

264

heredocignorelimit = 'NO_CHECK_EOF'

254

heredocignorelimit = 'NO_CHECK_EOF'

265

255

266

# the pattern to match against cases below, and to return a limit mark

256

# the pattern to match against cases below, and to return a limit mark

267

# string as 'lname' group

257

# string as 'lname' group

268

#

258

#

269

# - << LIMITMARK

259

# - << LIMITMARK

270

# - << "LIMITMARK"

260

# - << "LIMITMARK"

271

# - << 'LIMITMARK'

261

# - << 'LIMITMARK'

272

heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'

262

heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'

273

263

274

264

275

class fileheredocmatcher(embeddedmatcher):

265

class fileheredocmatcher(embeddedmatcher):

276

"""Detect "cat > FILE << LIMIT" style embedded code

266

"""Detect "cat > FILE << LIMIT" style embedded code

277

267

278

>>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')

268

>>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')

279

>>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))

269

>>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))

280

('file.py', ' > EOF\\n')

270

('file.py', ' > EOF\\n')

281

>>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))

271

>>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))

282

('file.py', ' > EOF\\n')

272

('file.py', ' > EOF\\n')

283

>>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))

273

>>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))

284

('any file.py', ' > EOF\\n')

274

('any file.py', ' > EOF\\n')

285

>>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))

275

>>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))

286

('file.py', ' > ANYLIMIT\\n')

276

('file.py', ' > ANYLIMIT\\n')

287

>>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))

277

>>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))

288

('file.py', ' > ANYLIMIT\\n')

278

('file.py', ' > ANYLIMIT\\n')

289

>>> start = ' $ cat > file.py << EOF\\n'

279

>>> start = ' $ cat > file.py << EOF\\n'

290

>>> ctx = matcher.startsat(start)

280

>>> ctx = matcher.startsat(start)

291

>>> matcher.codeatstart(ctx, start)

281

>>> matcher.codeatstart(ctx, start)

292

>>> b2s(matcher.filename(ctx))

282

>>> b2s(matcher.filename(ctx))

293

'file.py'

283

'file.py'

294

>>> matcher.ignores(ctx)

284

>>> matcher.ignores(ctx)

295

False

285

False

296

>>> inside = ' > foo = 1\\n'

286

>>> inside = ' > foo = 1\\n'

297

>>> matcher.endsat(ctx, inside)

287

>>> matcher.endsat(ctx, inside)

298

False

288

False

299

>>> matcher.isinside(ctx, inside)

289

>>> matcher.isinside(ctx, inside)

300

True

290

True

301

>>> b2s(matcher.codeinside(ctx, inside))

291

>>> b2s(matcher.codeinside(ctx, inside))

302

'foo = 1\\n'

292

'foo = 1\\n'

303

>>> end = ' > EOF\\n'

293

>>> end = ' > EOF\\n'

304

>>> matcher.endsat(ctx, end)

294

>>> matcher.endsat(ctx, end)

305

True

295

True

306

>>> matcher.codeatend(ctx, end)

296

>>> matcher.codeatend(ctx, end)

307

>>> matcher.endsat(ctx, ' > EOFEOF\\n')

297

>>> matcher.endsat(ctx, ' > EOFEOF\\n')

308

False

298

False

309

>>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')

299

>>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')

310

>>> matcher.ignores(ctx)

300

>>> matcher.ignores(ctx)

311

True

301

True

312

"""

302

"""

313

303

314

_prefix = ' > '

304

_prefix = ' > '

315

305

316

def __init__(self, desc, namepat):

306

def __init__(self, desc, namepat):

317

super(fileheredocmatcher, self).__init__(desc)

307

super(fileheredocmatcher, self).__init__(desc)

318

308

319

# build the pattern to match against cases below (and ">>"

309

# build the pattern to match against cases below (and ">>"

320

# variants), and to return a target filename string as 'name'

310

# variants), and to return a target filename string as 'name'

321

# group

311

# group

322

#

312

#

323

# - > NAMEPAT

313

# - > NAMEPAT

324

# - > "NAMEPAT"

314

# - > "NAMEPAT"

325

# - > 'NAMEPAT'

315

# - > 'NAMEPAT'

326

namepat = (

316

namepat = (

327

r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat

317

r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat

328

)

318

)

329

self._fileres = [

319

self._fileres = [

330

# "cat > NAME << LIMIT" case

320

# "cat > NAME << LIMIT" case

331

re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat),

321

re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat),

332

# "cat << LIMIT > NAME" case

322

# "cat << LIMIT > NAME" case

333

re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat),

323

re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat),

334

]

324

]

335

325

336

def startsat(self, line):

326

def startsat(self, line):

337

# ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple

327

# ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple

338

for filere in self._fileres:

328

for filere in self._fileres:

339

matched = filere.match(line)

329

matched = filere.match(line)

340

if matched:

330

if matched:

341

return (

331

return (

342

matched.group('name'),

332

matched.group('name'),

343

' > %s\n' % matched.group('limit'),

333

' > %s\n' % matched.group('limit'),

344

)

334

)

345

335

346

def endsat(self, ctx, line):

336

def endsat(self, ctx, line):

347

return ctx[1] == line

337

return ctx[1] == line

348

338

349

def isinside(self, ctx, line):

339

def isinside(self, ctx, line):

350

return line.startswith(self._prefix)

340

return line.startswith(self._prefix)

351

341

352

def ignores(self, ctx):

342

def ignores(self, ctx):

353

return ' > %s\n' % heredocignorelimit == ctx[1]

343

return ' > %s\n' % heredocignorelimit == ctx[1]

354

344

355

def filename(self, ctx):

345

def filename(self, ctx):

356

return ctx[0]

346

return ctx[0]

357

347

358

def codeatstart(self, ctx, line):

348

def codeatstart(self, ctx, line):

359

return None # no embedded code at start line

349

return None # no embedded code at start line

360

350

361

def codeatend(self, ctx, line):

351

def codeatend(self, ctx, line):

362

return None # no embedded code at end line

352

return None # no embedded code at end line

363

353

364

def codeinside(self, ctx, line):

354

def codeinside(self, ctx, line):

365

return line[len(self._prefix) :] # strip prefix

355

return line[len(self._prefix) :] # strip prefix

366

356

367

357

368

####

358

####

369

# for embedded python script

359

# for embedded python script

370

360

371

361

372

class pydoctestmatcher(embeddedmatcher):

362

class pydoctestmatcher(embeddedmatcher):

373

"""Detect ">>> code" style embedded python code

363

"""Detect ">>> code" style embedded python code

374

364

375

>>> matcher = pydoctestmatcher()

365

>>> matcher = pydoctestmatcher()

376

>>> startline = ' >>> foo = 1\\n'

366

>>> startline = ' >>> foo = 1\\n'

377

>>> matcher.startsat(startline)

367

>>> matcher.startsat(startline)

378

True

368

True

379

>>> matcher.startsat(' ... foo = 1\\n')

369

>>> matcher.startsat(' ... foo = 1\\n')

380

False

370

False

381

>>> ctx = matcher.startsat(startline)

371

>>> ctx = matcher.startsat(startline)

382

>>> matcher.filename(ctx)

372

>>> matcher.filename(ctx)

383

>>> matcher.ignores(ctx)

373

>>> matcher.ignores(ctx)

384

False

374

False

385

>>> b2s(matcher.codeatstart(ctx, startline))

375

>>> b2s(matcher.codeatstart(ctx, startline))

386

'foo = 1\\n'

376

'foo = 1\\n'

387

>>> inside = ' >>> foo = 1\\n'

377

>>> inside = ' >>> foo = 1\\n'

388

>>> matcher.endsat(ctx, inside)

378

>>> matcher.endsat(ctx, inside)

389

False

379

False

390

>>> matcher.isinside(ctx, inside)

380

>>> matcher.isinside(ctx, inside)

391

True

381

True

392

>>> b2s(matcher.codeinside(ctx, inside))

382

>>> b2s(matcher.codeinside(ctx, inside))

393

'foo = 1\\n'

383

'foo = 1\\n'

394

>>> inside = ' ... foo = 1\\n'

384

>>> inside = ' ... foo = 1\\n'

395

>>> matcher.endsat(ctx, inside)

385

>>> matcher.endsat(ctx, inside)

396

False

386

False

397

>>> matcher.isinside(ctx, inside)

387

>>> matcher.isinside(ctx, inside)

398

True

388

True

399

>>> b2s(matcher.codeinside(ctx, inside))

389

>>> b2s(matcher.codeinside(ctx, inside))

400

'foo = 1\\n'

390

'foo = 1\\n'

401

>>> inside = ' expected output\\n'

391

>>> inside = ' expected output\\n'

402

>>> matcher.endsat(ctx, inside)

392

>>> matcher.endsat(ctx, inside)

403

False

393

False

404

>>> matcher.isinside(ctx, inside)

394

>>> matcher.isinside(ctx, inside)

405

True

395

True

406

>>> b2s(matcher.codeinside(ctx, inside))

396

>>> b2s(matcher.codeinside(ctx, inside))

407

'\\n'

397

'\\n'

408

>>> inside = ' \\n'

398

>>> inside = ' \\n'

409

>>> matcher.endsat(ctx, inside)

399

>>> matcher.endsat(ctx, inside)

410

False

400

False

411

>>> matcher.isinside(ctx, inside)

401

>>> matcher.isinside(ctx, inside)

412

True

402

True

413

>>> b2s(matcher.codeinside(ctx, inside))

403

>>> b2s(matcher.codeinside(ctx, inside))

414

'\\n'

404

'\\n'

415

>>> end = ' $ foo bar\\n'

405

>>> end = ' $ foo bar\\n'

416

>>> matcher.endsat(ctx, end)

406

>>> matcher.endsat(ctx, end)

417

True

407

True

418

>>> matcher.codeatend(ctx, end)

408

>>> matcher.codeatend(ctx, end)

419

>>> end = '\\n'

409

>>> end = '\\n'

420

>>> matcher.endsat(ctx, end)

410

>>> matcher.endsat(ctx, end)

421

True

411

True

422

>>> matcher.codeatend(ctx, end)

412

>>> matcher.codeatend(ctx, end)

423

"""

413

"""

424

414

425

_prefix = ' >>> '

415

_prefix = ' >>> '

426

_prefixre = re.compile(r' {2}(>>>|\.\.\.) ')

416

_prefixre = re.compile(r' {2}(>>>|\.\.\.) ')

427

417

428

# If a line matches against not _prefixre but _outputre, that line

418

# If a line matches against not _prefixre but _outputre, that line

429

# is "an expected output line" (= not a part of code fragment).

419

# is "an expected output line" (= not a part of code fragment).

430

#

420

#

431

# Strictly speaking, a line matching against "(#if|#else|#endif)"

421

# Strictly speaking, a line matching against "(#if|#else|#endif)"

432

# is also treated similarly in "inline python code" semantics by

422

# is also treated similarly in "inline python code" semantics by

433

# run-tests.py. But "directive line inside inline python code"

423

# run-tests.py. But "directive line inside inline python code"

434

# should be rejected by Mercurial reviewers. Therefore, this

424

# should be rejected by Mercurial reviewers. Therefore, this

435

# regexp does not matche against such directive lines.

425

# regexp does not matche against such directive lines.

436

_outputre = re.compile(r' {2}$| {2}[^$]')

426

_outputre = re.compile(r' {2}$| {2}[^$]')

437

427

438

def __init__(self):

428

def __init__(self):

439

super(pydoctestmatcher, self).__init__("doctest style python code")

429

super(pydoctestmatcher, self).__init__("doctest style python code")

440

430

441

def startsat(self, line):

431

def startsat(self, line):

442

# ctx is "True"

432

# ctx is "True"

443

return line.startswith(self._prefix)

433

return line.startswith(self._prefix)

444

434

445

def endsat(self, ctx, line):

435

def endsat(self, ctx, line):

446

return not (self._prefixre.match(line) or self._outputre.match(line))

436

return not (self._prefixre.match(line) or self._outputre.match(line))

447

437

448

def isinside(self, ctx, line):

438

def isinside(self, ctx, line):

449

return True # always true, if not yet ended

439

return True # always true, if not yet ended

450

440

451

def ignores(self, ctx):

441

def ignores(self, ctx):

452

return False # should be checked always

442

return False # should be checked always

453

443

454

def filename(self, ctx):

444

def filename(self, ctx):

455

return None # no filename

445

return None # no filename

456

446

457

def codeatstart(self, ctx, line):

447

def codeatstart(self, ctx, line):

458

return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '

448

return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '

459

449

460

def codeatend(self, ctx, line):

450

def codeatend(self, ctx, line):

461

return None # no embedded code at end line

451

return None # no embedded code at end line

462

452

463

def codeinside(self, ctx, line):

453

def codeinside(self, ctx, line):

464

if self._prefixre.match(line):

454

if self._prefixre.match(line):

465

return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '

455

return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '

466

return '\n' # an expected output line is treated as an empty line

456

return '\n' # an expected output line is treated as an empty line

467

457

468

458

469

class pyheredocmatcher(embeddedmatcher):

459

class pyheredocmatcher(embeddedmatcher):

470

"""Detect "python << LIMIT" style embedded python code

460

"""Detect "python << LIMIT" style embedded python code

471

461

472

>>> matcher = pyheredocmatcher()

462

>>> matcher = pyheredocmatcher()

473

>>> b2s(matcher.startsat(' $ python << EOF\\n'))

463

>>> b2s(matcher.startsat(' $ python << EOF\\n'))

474

' > EOF\\n'

464

' > EOF\\n'

475

>>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))

465

>>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))

476

' > EOF\\n'

466

' > EOF\\n'

477

>>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))

467

>>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))

478

' > EOF\\n'

468

' > EOF\\n'

479

>>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))

469

>>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))

480

' > ANYLIMIT\\n'

470

' > ANYLIMIT\\n'

481

>>> matcher.startsat(' $ "$PYTHON" < EOF\\n')

471

>>> matcher.startsat(' $ "$PYTHON" < EOF\\n')

482

>>> start = ' $ python << EOF\\n'

472

>>> start = ' $ python << EOF\\n'

483

>>> ctx = matcher.startsat(start)

473

>>> ctx = matcher.startsat(start)

484

>>> matcher.codeatstart(ctx, start)

474

>>> matcher.codeatstart(ctx, start)

485

>>> matcher.filename(ctx)

475

>>> matcher.filename(ctx)

486

>>> matcher.ignores(ctx)

476

>>> matcher.ignores(ctx)

487

False

477

False

488

>>> inside = ' > foo = 1\\n'

478

>>> inside = ' > foo = 1\\n'

489

>>> matcher.endsat(ctx, inside)

479

>>> matcher.endsat(ctx, inside)

490

False

480

False

491

>>> matcher.isinside(ctx, inside)

481

>>> matcher.isinside(ctx, inside)

492

True

482

True

493

>>> b2s(matcher.codeinside(ctx, inside))

483

>>> b2s(matcher.codeinside(ctx, inside))

494

'foo = 1\\n'

484

'foo = 1\\n'

495

>>> end = ' > EOF\\n'

485

>>> end = ' > EOF\\n'

496

>>> matcher.endsat(ctx, end)

486

>>> matcher.endsat(ctx, end)

497

True

487

True

498

>>> matcher.codeatend(ctx, end)

488

>>> matcher.codeatend(ctx, end)

499

>>> matcher.endsat(ctx, ' > EOFEOF\\n')

489

>>> matcher.endsat(ctx, ' > EOFEOF\\n')

500

False

490

False

501

>>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')

491

>>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')

502

>>> matcher.ignores(ctx)

492

>>> matcher.ignores(ctx)

503

True

493

True

504

"""

494

"""

505

495

506

_prefix = ' > '

496

_prefix = ' > '

507

497

508

_startre = re.compile(

498

_startre = re.compile(

509

r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat

499

r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat

510

)

500

)

511

501

512

def __init__(self):

502

def __init__(self):

513

super(pyheredocmatcher, self).__init__("heredoc python invocation")

503

super(pyheredocmatcher, self).__init__("heredoc python invocation")

514

504

515

def startsat(self, line):

505

def startsat(self, line):

516

# ctx is END-LINE-OF-EMBEDDED-CODE

506

# ctx is END-LINE-OF-EMBEDDED-CODE

517

matched = self._startre.match(line)

507

matched = self._startre.match(line)

518

if matched:

508

if matched:

519

return ' > %s\n' % matched.group('limit')

509

return ' > %s\n' % matched.group('limit')

520

510

521

def endsat(self, ctx, line):

511

def endsat(self, ctx, line):

522

return ctx == line

512

return ctx == line

523

513

524

def isinside(self, ctx, line):

514

def isinside(self, ctx, line):

525

return line.startswith(self._prefix)

515

return line.startswith(self._prefix)

526

516

527

def ignores(self, ctx):

517

def ignores(self, ctx):

528

return ' > %s\n' % heredocignorelimit == ctx

518

return ' > %s\n' % heredocignorelimit == ctx

529

519

530

def filename(self, ctx):

520

def filename(self, ctx):

531

return None # no filename

521

return None # no filename

532

522

533

def codeatstart(self, ctx, line):

523

def codeatstart(self, ctx, line):

534

return None # no embedded code at start line

524

return None # no embedded code at start line

535

525

536

def codeatend(self, ctx, line):

526

def codeatend(self, ctx, line):

537

return None # no embedded code at end line

527

return None # no embedded code at end line

538

528

539

def codeinside(self, ctx, line):

529

def codeinside(self, ctx, line):

540

return line[len(self._prefix) :] # strip prefix

530

return line[len(self._prefix) :] # strip prefix

541

531

542

532

543

_pymatchers = [

533

_pymatchers = [

544

pydoctestmatcher(),

534

pydoctestmatcher(),

545

pyheredocmatcher(),

535

pyheredocmatcher(),

546

# use '[^<]+' instead of '\S+', in order to match against

536

# use '[^<]+' instead of '\S+', in order to match against

547

# paths including whitespaces

537

# paths including whitespaces

548

fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),

538

fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),

549

]

539

]

550

540

551

541

552

def pyembedded(basefile, lines, errors):

542

def pyembedded(basefile, lines, errors):

553

return embedded(basefile, lines, errors, _pymatchers)

543

return embedded(basefile, lines, errors, _pymatchers)

554

544

555

545

556

####

546

####

557

# for embedded shell script

547

# for embedded shell script

558

548

559

_shmatchers = [

549

_shmatchers = [

560

# use '[^<]+' instead of '\S+', in order to match against

550

# use '[^<]+' instead of '\S+', in order to match against

561

# paths including whitespaces

551

# paths including whitespaces

562

fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),

552

fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),

563

]

553

]

564

554

565

555

566

def shembedded(basefile, lines, errors):

556

def shembedded(basefile, lines, errors):

567

return embedded(basefile, lines, errors, _shmatchers)

557

return embedded(basefile, lines, errors, _shmatchers)

568

558

569

559

570

####

560

####

571

# for embedded hgrc configuration

561

# for embedded hgrc configuration

572

562

573

_hgrcmatchers = [

563

_hgrcmatchers = [

574

# use '[^<]+' instead of '\S+', in order to match against

564

# use '[^<]+' instead of '\S+', in order to match against

575

# paths including whitespaces

565

# paths including whitespaces

576

fileheredocmatcher(

566

fileheredocmatcher(

577

'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'

567

'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'

578

),

568

),

579

]

569

]

580

570

581

571

582

def hgrcembedded(basefile, lines, errors):

572

def hgrcembedded(basefile, lines, errors):

583

return embedded(basefile, lines, errors, _hgrcmatchers)

573

return embedded(basefile, lines, errors, _hgrcmatchers)

584

574

585

575

586

####

576

####

587

577

588

if __name__ == "__main__":

578

if __name__ == "__main__":

589

import optparse

579

import optparse

590

import sys

580

import sys

591

581

592

def showembedded(basefile, lines, embeddedfunc, opts):

582

def showembedded(basefile, lines, embeddedfunc, opts):

593

errors = []

583

errors = []

594

for name, starts, ends, code in embeddedfunc(basefile, lines, errors):

584

for name, starts, ends, code in embeddedfunc(basefile, lines, errors):

595

if not name:

585

if not name:

596

name = '<anonymous>'

586

name = '<anonymous>'

597

writeout("%s:%d: %s starts\n" % (basefile, starts, name))

587

writeout("%s:%d: %s starts\n" % (basefile, starts, name))

598

if opts.verbose and code:

588

if opts.verbose and code:

599

writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))

589

writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))

600

writeout("%s:%d: %s ends\n" % (basefile, ends, name))

590

writeout("%s:%d: %s ends\n" % (basefile, ends, name))

601

for e in errors:

591

for e in errors:

602

writeerr("%s\n" % e)

592

writeerr("%s\n" % e)

603

return len(errors)

593

return len(errors)

604

594

605

def applyembedded(args, embeddedfunc, opts):

595

def applyembedded(args, embeddedfunc, opts):

606

ret = 0

596

ret = 0

607

if args:

597

if args:

608

for f in args:

598

for f in args:

609

with opentext(f) as fp:

599

with opentext(f) as fp:

610

if showembedded(f, fp, embeddedfunc, opts):

600

if showembedded(f, fp, embeddedfunc, opts):

611

ret = 1

601

ret = 1

612

else:

602

else:

613

lines = [l for l in sys.stdin.readlines()]

603

lines = [l for l in sys.stdin.readlines()]

614

if showembedded('<stdin>', lines, embeddedfunc, opts):

604

if showembedded('<stdin>', lines, embeddedfunc, opts):

615

ret = 1

605

ret = 1

616

return ret

606

return ret

617

607

618

commands = {}

608

commands = {}

619

609

620

def command(name, desc):

610

def command(name, desc):

621

def wrap(func):

611

def wrap(func):

622

commands[name] = (desc, func)

612

commands[name] = (desc, func)

623

613

624

return wrap

614

return wrap

625

615

626

@command("pyembedded", "detect embedded python script")

616

@command("pyembedded", "detect embedded python script")

627

def pyembeddedcmd(args, opts):

617

def pyembeddedcmd(args, opts):

628

return applyembedded(args, pyembedded, opts)

618

return applyembedded(args, pyembedded, opts)

629

619

630

@command("shembedded", "detect embedded shell script")

620

@command("shembedded", "detect embedded shell script")

631

def shembeddedcmd(args, opts):

621

def shembeddedcmd(args, opts):

632

return applyembedded(args, shembedded, opts)

622

return applyembedded(args, shembedded, opts)

633

623

634

@command("hgrcembedded", "detect embedded hgrc configuration")

624

@command("hgrcembedded", "detect embedded hgrc configuration")

635

def hgrcembeddedcmd(args, opts):

625

def hgrcembeddedcmd(args, opts):

636

return applyembedded(args, hgrcembedded, opts)

626

return applyembedded(args, hgrcembedded, opts)

637

627

638

availablecommands = "\n".join(

628

availablecommands = "\n".join(

639

[" - %s: %s" % (key, value[0]) for key, value in commands.items()]

629

[" - %s: %s" % (key, value[0]) for key, value in commands.items()]

640

)

630

)

641

631

642

parser = optparse.OptionParser(

632

parser = optparse.OptionParser(

643

"""%prog COMMAND [file ...]

633

"""%prog COMMAND [file ...]

644

634

645

Pick up embedded code fragments from given file(s) or stdin, and list

635

Pick up embedded code fragments from given file(s) or stdin, and list

646

up start/end lines of them in standard compiler format

636

up start/end lines of them in standard compiler format

647

("FILENAME:LINENO:").

637

("FILENAME:LINENO:").

648

638

649

Available commands are:

639

Available commands are:

650

"""

640

"""

651

+ availablecommands

641

+ availablecommands

652

+ """

642

+ """

653

"""

643

"""

654

)

644

)

655

parser.add_option(

645

parser.add_option(

656

"-v",

646

"-v",

657

"--verbose",

647

"--verbose",

658

help="enable additional output (e.g. actual code)",

648

help="enable additional output (e.g. actual code)",

659

action="store_true",

649

action="store_true",

660

)

650

)

661

(opts, args) = parser.parse_args()

651

(opts, args) = parser.parse_args()

662

652

663

if not args or args[0] not in commands:

653

if not args or args[0] not in commands:

664

parser.print_help()

654

parser.print_help()

665

sys.exit(255)

655

sys.exit(255)

666

656

667

sys.exit(commands[args[0]][1](args[1:], opts))

657

sys.exit(commands[args[0]][1](args[1:], opts))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # testparseutil.py - utilities to parse test script for check tools
             #
             #  Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             import abc
+            import builtins
             import re
-            import sys
             ####################
             # for Python3 compatibility (almost comes from mercurial/pycompat.py)
-            ispy3 = sys.version_info[0] >= 3
             def identity(a):
                 return a
             def _rapply(f, xs):
                 if xs is None:
                     # assume None means non-value of optional data
                     return xs
                 if isinstance(xs, (list, set, tuple)):
                     return type(xs)(_rapply(f, x) for x in xs)
                 if isinstance(xs, dict):
                     return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
                 return f(xs)
             def rapply(f, xs):
                 if f is identity:
                     # fast path mainly for py2
                     return xs
                 return _rapply(f, xs)
-            if ispy3:
+            def bytestr(s):
-                import builtins
+                # tiny version of pycompat.bytestr
+                return s.encode('latin1')
-                def bytestr(s):
-                    # tiny version of pycompat.bytestr
-                    return s.encode('latin1')
-                def sysstr(s):
-                    if isinstance(s, builtins.str):
-                        return s
-                    return s.decode('latin-1')
-                def opentext(f):
-                    return open(f, 'r')
-            else:
+            def sysstr(s):
-                bytestr = str
+                if isinstance(s, builtins.str):
-                sysstr = identity
+                    return s
+                return s.decode('latin-1')
-                opentext = open
+            def opentext(f):
+                return open(f, 'r')
             def b2s(x):
                 # convert BYTES elements in "x" to SYSSTR recursively
                 return rapply(sysstr, x)
             def writeout(data):
                 # write "data" in BYTES into stdout
                 sys.stdout.write(data)
             def writeerr(data):
                 # write "data" in BYTES into stderr
                 sys.stderr.write(data)
             ####################
             class embeddedmatcher:  # pytype: disable=ignored-metaclass
                 """Base class to detect embedded code fragments in *.t test script"""
                 __metaclass__ = abc.ABCMeta
                 def __init__(self, desc):
                     self.desc = desc
                 @abc.abstractmethod
                 def startsat(self, line):
                     """Examine whether embedded code starts at line
                     This can return arbitrary object, and it is used as 'ctx' for
                     subsequent method invocations.
                     """
                 @abc.abstractmethod
                 def endsat(self, ctx, line):
                     """Examine whether embedded code ends at line"""
                 @abc.abstractmethod
                 def isinside(self, ctx, line):
                     """Examine whether line is inside embedded code, if not yet endsat"""
                 @abc.abstractmethod
                 def ignores(self, ctx):
                     """Examine whether detected embedded code should be ignored"""
                 @abc.abstractmethod
                 def filename(self, ctx):
                     """Return filename of embedded code
                     If filename isn't specified for embedded code explicitly, this
                     returns None.
                     """
                 @abc.abstractmethod
                 def codeatstart(self, ctx, line):
                     """Return actual code at the start line of embedded code
                     This might return None, if the start line doesn't contain
                     actual code.
                     """
                 @abc.abstractmethod
                 def codeatend(self, ctx, line):
                     """Return actual code at the end line of embedded code
                     This might return None, if the end line doesn't contain actual
                     code.
                     """
                 @abc.abstractmethod
                 def codeinside(self, ctx, line):
                     """Return actual code at line inside embedded code"""
             def embedded(basefile, lines, errors, matchers):
                 """pick embedded code fragments up from given lines
                 This is common parsing logic, which examines specified matchers on
                 given lines.
                 :basefile: a name of a file, from which lines to be parsed come.
                 :lines: to be parsed (might be a value returned by "open(basefile)")
                 :errors: an array, into which messages for detected error are stored
                 :matchers: an array of embeddedmatcher objects
                 This function yields '(filename, starts, ends, code)' tuple.
                 :filename: a name of embedded code, if it is explicitly specified
                            (e.g.  "foobar" of "cat >> foobar <<EOF").
                            Otherwise, this is None
                 :starts: line number (1-origin), at which embedded code starts (inclusive)
                 :ends: line number (1-origin), at which embedded code ends (exclusive)
                 :code: extracted embedded code, which is single-stringified
                 >>> class ambigmatcher:
                 ...     # mock matcher class to examine implementation of
                 ...     # "ambiguous matching" corner case
                 ...     def __init__(self, desc, matchfunc):
                 ...         self.desc = desc
                 ...         self.matchfunc = matchfunc
                 ...     def startsat(self, line):
                 ...         return self.matchfunc(line)
                 >>> ambig1 = ambigmatcher('ambiguous #1',
                 ...                       lambda l: l.startswith('  $ cat '))
                 >>> ambig2 = ambigmatcher('ambiguous #2',
                 ...                       lambda l: l.endswith('<< EOF\\n'))
                 >>> lines = ['  $ cat > foo.py << EOF\\n']
                 >>> errors = []
                 >>> matchers = [ambig1, ambig2]
                 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
                 []
                 >>> b2s(errors)
                 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
                 """
                 matcher = None
                 ctx = filename = code = startline = None  # for pyflakes
                 for lineno, line in enumerate(lines, 1):
                     if not line.endswith('\n'):
                         line += '\n'  # to normalize EOF line
                     if matcher:  # now, inside embedded code
                         if matcher.endsat(ctx, line):
                             codeatend = matcher.codeatend(ctx, line)
                             if codeatend is not None:
                                 code.append(codeatend)
                             if not matcher.ignores(ctx):
                                 yield (filename, startline, lineno, ''.join(code))
                             matcher = None
                             # DO NOT "continue", because line might start next fragment
                         elif not matcher.isinside(ctx, line):
                             # this is an error of basefile
                             # (if matchers are implemented correctly)
                             errors.append(
                                 '%s:%d: unexpected line for "%s"'
                                 % (basefile, lineno, matcher.desc)
                             )
                             # stop extracting embedded code by current 'matcher',
                             # because appearance of unexpected line might mean
                             # that expected end-of-embedded-code line might never
                             # appear
                             matcher = None
                             # DO NOT "continue", because line might start next fragment
                         else:
                             code.append(matcher.codeinside(ctx, line))
                             continue
                     # examine whether current line starts embedded code or not
                     assert not matcher
                     matched = []
                     for m in matchers:
                         ctx = m.startsat(line)
                         if ctx:
                             matched.append((m, ctx))
                     if matched:
                         if len(matched) > 1:
                             # this is an error of matchers, maybe
                             errors.append(
                                 '%s:%d: ambiguous line for %s'
                                 % (
                                     basefile,
                                     lineno,
                                     ', '.join(['"%s"' % m.desc for m, c in matched]),
                                 )
                             )
                             # omit extracting embedded code, because choosing
                             # arbitrary matcher from matched ones might fail to
                             # detect the end of embedded code as expected.
                             continue
                         matcher, ctx = matched[0]
                         filename = matcher.filename(ctx)
                         code = []
                         codeatstart = matcher.codeatstart(ctx, line)
                         if codeatstart is not None:
                             code.append(codeatstart)
                             startline = lineno
                         else:
                             startline = lineno + 1
                 if matcher:
                     # examine whether EOF ends embedded code, because embedded
                     # code isn't yet ended explicitly
                     if matcher.endsat(ctx, '\n'):
                         codeatend = matcher.codeatend(ctx, '\n')
                         if codeatend is not None:
                             code.append(codeatend)
                         if not matcher.ignores(ctx):
                             yield (filename, startline, lineno + 1, ''.join(code))
                     else:
                         # this is an error of basefile
                         # (if matchers are implemented correctly)
                         errors.append(
                             '%s:%d: unexpected end of file for "%s"'
                             % (basefile, lineno, matcher.desc)
                         )
             # heredoc limit mark to ignore embedded code at check-code.py or so
             heredocignorelimit = 'NO_CHECK_EOF'
             # the pattern to match against cases below, and to return a limit mark
             # string as 'lname' group
             #
             # - << LIMITMARK
             # - << "LIMITMARK"
             # - << 'LIMITMARK'
             heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
             class fileheredocmatcher(embeddedmatcher):
                 """Detect "cat > FILE << LIMIT" style embedded code
                 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
                 >>> b2s(matcher.startsat('  $ cat > file.py << EOF\\n'))
                 ('file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat('  $ cat   >>file.py   <<EOF\\n'))
                 ('file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat('  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))
                 ('any file.py', '  > EOF\\n')
                 >>> b2s(matcher.startsat("  $ cat > file.py << 'ANYLIMIT'\\n"))
                 ('file.py', '  > ANYLIMIT\\n')
                 >>> b2s(matcher.startsat('  $ cat<<ANYLIMIT>"file.py"\\n'))
                 ('file.py', '  > ANYLIMIT\\n')
                 >>> start = '  $ cat > file.py << EOF\\n'
                 >>> ctx = matcher.startsat(start)
                 >>> matcher.codeatstart(ctx, start)
                 >>> b2s(matcher.filename(ctx))
                 'file.py'
                 >>> matcher.ignores(ctx)
                 False
                 >>> inside = '  > foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> end = '  > EOF\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                 False
                 >>> ctx = matcher.startsat('  $ cat > file.py << NO_CHECK_EOF\\n')
                 >>> matcher.ignores(ctx)
                 True
                 """
                 _prefix = '  > '
                 def __init__(self, desc, namepat):
                     super(fileheredocmatcher, self).__init__(desc)
                     # build the pattern to match against cases below (and ">>"
                     # variants), and to return a target filename string as 'name'
                     # group
                     #
                     # - > NAMEPAT
                     # - > "NAMEPAT"
                     # - > 'NAMEPAT'
                     namepat = (
                         r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
                     )
                     self._fileres = [
                         # "cat > NAME << LIMIT" case
                         re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat),
                         # "cat << LIMIT > NAME" case
                         re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat),
                     ]
                 def startsat(self, line):
                     # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
                     for filere in self._fileres:
                         matched = filere.match(line)
                         if matched:
                             return (
                                 matched.group('name'),
                                 '  > %s\n' % matched.group('limit'),
                             )
                 def endsat(self, ctx, line):
                     return ctx[1] == line
                 def isinside(self, ctx, line):
                     return line.startswith(self._prefix)
                 def ignores(self, ctx):
                     return '  > %s\n' % heredocignorelimit == ctx[1]
                 def filename(self, ctx):
                     return ctx[0]
                 def codeatstart(self, ctx, line):
                     return None  # no embedded code at start line
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix
             ####
             # for embedded python script
             class pydoctestmatcher(embeddedmatcher):
                 """Detect ">>> code" style embedded python code
                 >>> matcher = pydoctestmatcher()
                 >>> startline = '  >>> foo = 1\\n'
                 >>> matcher.startsat(startline)
                 True
                 >>> matcher.startsat('  ... foo = 1\\n')
                 False
                 >>> ctx = matcher.startsat(startline)
                 >>> matcher.filename(ctx)
                 >>> matcher.ignores(ctx)
                 False
                 >>> b2s(matcher.codeatstart(ctx, startline))
                 'foo = 1\\n'
                 >>> inside = '  >>> foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> inside = '  ... foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> inside = '  expected output\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 '\\n'
                 >>> inside = '  \\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 '\\n'
                 >>> end = '  $ foo bar\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> end = '\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 """
                 _prefix = '  >>> '
                 _prefixre = re.compile(r' {2}(>>>|\.\.\.) ')
                 # If a line matches against not _prefixre but _outputre, that line
                 # is "an expected output line" (= not a part of code fragment).
                 #
                 # Strictly speaking, a line matching against "(#if|#else|#endif)"
                 # is also treated similarly in "inline python code" semantics by
                 # run-tests.py. But "directive line inside inline python code"
                 # should be rejected by Mercurial reviewers. Therefore, this
                 # regexp does not matche against such directive lines.
                 _outputre = re.compile(r' {2}$| {2}[^$]')
                 def __init__(self):
                     super(pydoctestmatcher, self).__init__("doctest style python code")
                 def startsat(self, line):
                     # ctx is "True"
                     return line.startswith(self._prefix)
                 def endsat(self, ctx, line):
                     return not (self._prefixre.match(line) or self._outputre.match(line))
                 def isinside(self, ctx, line):
                     return True  # always true, if not yet ended
                 def ignores(self, ctx):
                     return False  # should be checked always
                 def filename(self, ctx):
                     return None  # no filename
                 def codeatstart(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     if self._prefixre.match(line):
                         return line[len(self._prefix) :]  # strip prefix '  >>> '/'  ... '
                     return '\n'  # an expected output line is treated as an empty line
             class pyheredocmatcher(embeddedmatcher):
                 """Detect "python << LIMIT" style embedded python code
                 >>> matcher = pyheredocmatcher()
                 >>> b2s(matcher.startsat('  $ python << EOF\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat('  $ $PYTHON   <<EOF\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat('  $ "$PYTHON"<<  "EOF"\\n'))
                 '  > EOF\\n'
                 >>> b2s(matcher.startsat("  $ $PYTHON << 'ANYLIMIT'\\n"))
                 '  > ANYLIMIT\\n'
                 >>> matcher.startsat('  $ "$PYTHON" < EOF\\n')
                 >>> start = '  $ python << EOF\\n'
                 >>> ctx = matcher.startsat(start)
                 >>> matcher.codeatstart(ctx, start)
                 >>> matcher.filename(ctx)
                 >>> matcher.ignores(ctx)
                 False
                 >>> inside = '  > foo = 1\\n'
                 >>> matcher.endsat(ctx, inside)
                 False
                 >>> matcher.isinside(ctx, inside)
                 True
                 >>> b2s(matcher.codeinside(ctx, inside))
                 'foo = 1\\n'
                 >>> end = '  > EOF\\n'
                 >>> matcher.endsat(ctx, end)
                 True
                 >>> matcher.codeatend(ctx, end)
                 >>> matcher.endsat(ctx, '  > EOFEOF\\n')
                 False
                 >>> ctx = matcher.startsat('  $ python << NO_CHECK_EOF\\n')
                 >>> matcher.ignores(ctx)
                 True
                 """
                 _prefix = '  > '
                 _startre = re.compile(
                     r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
                 )
                 def __init__(self):
                     super(pyheredocmatcher, self).__init__("heredoc python invocation")
                 def startsat(self, line):
                     # ctx is END-LINE-OF-EMBEDDED-CODE
                     matched = self._startre.match(line)
                     if matched:
                         return '  > %s\n' % matched.group('limit')
                 def endsat(self, ctx, line):
                     return ctx == line
                 def isinside(self, ctx, line):
                     return line.startswith(self._prefix)
                 def ignores(self, ctx):
                     return '  > %s\n' % heredocignorelimit == ctx
                 def filename(self, ctx):
                     return None  # no filename
                 def codeatstart(self, ctx, line):
                     return None  # no embedded code at start line
                 def codeatend(self, ctx, line):
                     return None  # no embedded code at end line
                 def codeinside(self, ctx, line):
                     return line[len(self._prefix) :]  # strip prefix
             _pymatchers = [
                 pydoctestmatcher(),
                 pyheredocmatcher(),
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
             ]
             def pyembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _pymatchers)
             ####
             # for embedded shell script
             _shmatchers = [
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
             ]
             def shembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _shmatchers)
             ####
             # for embedded hgrc configuration
             _hgrcmatchers = [
                 # use '[^<]+' instead of '\S+', in order to match against
                 # paths including whitespaces
                 fileheredocmatcher(
                     'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
                 ),
             ]
             def hgrcembedded(basefile, lines, errors):
                 return embedded(basefile, lines, errors, _hgrcmatchers)
             ####
             if __name__ == "__main__":
                 import optparse
                 import sys
                 def showembedded(basefile, lines, embeddedfunc, opts):
                     errors = []
                     for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
                         if not name:
                             name = '<anonymous>'
                         writeout("%s:%d: %s starts\n" % (basefile, starts, name))
                         if opts.verbose and code:
                             writeout("  |%s\n" % "\n  |".join(l for l in code.splitlines()))
                         writeout("%s:%d: %s ends\n" % (basefile, ends, name))
                     for e in errors:
                         writeerr("%s\n" % e)
                     return len(errors)
                 def applyembedded(args, embeddedfunc, opts):
                     ret = 0
                     if args:
                         for f in args:
                             with opentext(f) as fp:
                                 if showembedded(f, fp, embeddedfunc, opts):
                                     ret = 1
                     else:
                         lines = [l for l in sys.stdin.readlines()]
                         if showembedded('<stdin>', lines, embeddedfunc, opts):
                             ret = 1
                     return ret
                 commands = {}
                 def command(name, desc):
                     def wrap(func):
                         commands[name] = (desc, func)
                     return wrap
                 @command("pyembedded", "detect embedded python script")
                 def pyembeddedcmd(args, opts):
                     return applyembedded(args, pyembedded, opts)
                 @command("shembedded", "detect embedded shell script")
                 def shembeddedcmd(args, opts):
                     return applyembedded(args, shembedded, opts)
                 @command("hgrcembedded", "detect embedded hgrc configuration")
                 def hgrcembeddedcmd(args, opts):
                     return applyembedded(args, hgrcembedded, opts)
                 availablecommands = "\n".join(
                     ["  - %s: %s" % (key, value[0]) for key, value in commands.items()]
                 )
                 parser = optparse.OptionParser(
                     """%prog COMMAND [file ...]
             Pick up embedded code fragments from given file(s) or stdin, and list
             up start/end lines of them in standard compiler format
             ("FILENAME:LINENO:").
             Available commands are:
             """
                     + availablecommands
                     + """
             """
                 )
                 parser.add_option(
                     "-v",
                     "--verbose",
                     help="enable additional output (e.g. actual code)",
                     action="store_true",
                 )
                 (opts, args) = parser.parse_args()
                 if not args or args[0] not in commands:
                     parser.print_help()
                     sys.exit(255)
                 sys.exit(commands[args[0]][1](args[1:], opts))