upstream/mercurial-mirror Commit - r11192:babf9a5f

1

# minirst.py - minimal reStructuredText parser

1

# minirst.py - minimal reStructuredText parser

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""simplified reStructuredText parser.

8

"""simplified reStructuredText parser.

9

10

This parser knows just enough about reStructuredText to parse the

10

This parser knows just enough about reStructuredText to parse the

11

Mercurial docstrings.

11

Mercurial docstrings.

12

13

It cheats in a major way: nested blocks are not really nested. They

13

It cheats in a major way: nested blocks are not really nested. They

14

are just indented blocks that look like they are nested. This relies

14

are just indented blocks that look like they are nested. This relies

15

on the user to keep the right indentation for the blocks.

15

on the user to keep the right indentation for the blocks.

16

17

It only supports a small subset of reStructuredText:

17

It only supports a small subset of reStructuredText:

18

19

- sections

19

- sections

20

21

- paragraphs

21

- paragraphs

22

23

- literal blocks

23

- literal blocks

24

25

- definition lists

25

- definition lists

26

27

- bullet lists (items must start with '-')

27

- bullet lists (items must start with '-')

28

29

- enumerated lists (no autonumbering)

29

- enumerated lists (no autonumbering)

30

31

- field lists (colons cannot be escaped)

31

- field lists (colons cannot be escaped)

32

33

- option lists (supports only long options without arguments)

33

- option lists (supports only long options without arguments)

34

35

- inline literals (no other inline markup is not recognized)

35

- inline literals (no other inline markup is not recognized)

36

"""

36

"""

37

38

import re, sys, textwrap

38

import re, sys, textwrap

39

40

41

def findblocks(text):

41

def findblocks(text):

42

"""Find continuous blocks of lines in text.

42

"""Find continuous blocks of lines in text.

43

44

Returns a list of dictionaries representing the blocks. Each block

44

Returns a list of dictionaries representing the blocks. Each block

45

has an 'indent' field and a 'lines' field.

45

has an 'indent' field and a 'lines' field.

46

"""

46

"""

47

blocks = [[]]

47

blocks = [[]]

48

lines = text.splitlines()

48

lines = text.splitlines()

49

for line in lines:

49

for line in lines:

50

if line.strip():

50

if line.strip():

51

blocks[-1].append(line)

51

blocks[-1].append(line)

52

elif blocks[-1]:

52

elif blocks[-1]:

53

blocks.append([])

53

blocks.append([])

54

if not blocks[-1]:

54

if not blocks[-1]:

55

del blocks[-1]

55

del blocks[-1]

56

57

for i, block in enumerate(blocks):

57

for i, block in enumerate(blocks):

58

indent = min((len(l) - len(l.lstrip())) for l in block)

58

indent = min((len(l) - len(l.lstrip())) for l in block)

59

blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

59

blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

60

return blocks

60

return blocks

61

62

63

def findliteralblocks(blocks):

63

def findliteralblocks(blocks):

64

"""Finds literal blocks and adds a 'type' field to the blocks.

64

"""Finds literal blocks and adds a 'type' field to the blocks.

65

66

Literal blocks are given the type 'literal', all other blocks are

66

Literal blocks are given the type 'literal', all other blocks are

67

given type the 'paragraph'.

67

given type the 'paragraph'.

68

"""

68

"""

69

i = 0

69

i = 0

70

while i < len(blocks):

70

while i < len(blocks):

71

# Searching for a block that looks like this:

71

# Searching for a block that looks like this:

72

#

72

#

73

# +------------------------------+

73

# +------------------------------+

74

# | paragraph |

74

# | paragraph |

75

# | (ends with "::") |

75

# | (ends with "::") |

76

# +------------------------------+

76

# +------------------------------+

77

# +---------------------------+

77

# +---------------------------+

78

# | indented literal block |

78

# | indented literal block |

79

# +---------------------------+

79

# +---------------------------+

80

blocks[i]['type'] = 'paragraph'

80

blocks[i]['type'] = 'paragraph'

81

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

81

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

82

indent = blocks[i]['indent']

82

indent = blocks[i]['indent']

83

adjustment = blocks[i + 1]['indent'] - indent

83

adjustment = blocks[i + 1]['indent'] - indent

84

85

if blocks[i]['lines'] == ['::']:

85

if blocks[i]['lines'] == ['::']:

86

# Expanded form: remove block

86

# Expanded form: remove block

87

del blocks[i]

87

del blocks[i]

88

i -= 1

88

i -= 1

89

elif blocks[i]['lines'][-1].endswith(' ::'):

89

elif blocks[i]['lines'][-1].endswith(' ::'):

90

# Partially minimized form: remove space and both

90

# Partially minimized form: remove space and both

91

# colons.

91

# colons.

92

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

92

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

93

else:

93

else:

94

# Fully minimized form: remove just one colon.

94

# Fully minimized form: remove just one colon.

95

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

95

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

96

97

# List items are formatted with a hanging indent. We must

97

# List items are formatted with a hanging indent. We must

98

# correct for this here while we still have the original

98

# correct for this here while we still have the original

99

# information on the indentation of the subsequent literal

99

# information on the indentation of the subsequent literal

100

# blocks available.

100

# blocks available.

101

m = _bulletre.match(blocks[i]['lines'][0])

101

m = _bulletre.match(blocks[i]['lines'][0])

102

if m:

102

if m:

103

indent += m.end()

103

indent += m.end()

104

adjustment -= m.end()

104

adjustment -= m.end()

105

106

# Mark the following indented blocks.

106

# Mark the following indented blocks.

107

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

107

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

108

blocks[i + 1]['type'] = 'literal'

108

blocks[i + 1]['type'] = 'literal'

109

blocks[i + 1]['indent'] -= adjustment

109

blocks[i + 1]['indent'] -= adjustment

110

i += 1

110

i += 1

111

i += 1

111

i += 1

112

return blocks

112

return blocks

113

114

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

114

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

115

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

115

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

116

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

116

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

117

_definitionre = re.compile(r'[^ ]')

117

_definitionre = re.compile(r'[^ ]')

118

119

def splitparagraphs(blocks):

119

def splitparagraphs(blocks):

120

"""Split paragraphs into lists."""

120

"""Split paragraphs into lists."""

121

# Tuples with (list type, item regexp, single line items?). Order

121

# Tuples with (list type, item regexp, single line items?). Order

122

# matters: definition lists has the least specific regexp and must

122

# matters: definition lists has the least specific regexp and must

123

# come last.

123

# come last.

124

listtypes = [('bullet', _bulletre, True),

124

listtypes = [('bullet', _bulletre, True),

125

('option', _optionre, True),

125

('option', _optionre, True),

126

('field', _fieldre, True),

126

('field', _fieldre, True),

127

('definition', _definitionre, False)]

127

('definition', _definitionre, False)]

128

129

def match(lines, i, itemre, singleline):

129

def match(lines, i, itemre, singleline):

130

"""Does itemre match an item at line i?

130

"""Does itemre match an item at line i?

131

132

A list item can be followed by an idented line or another list

132

A list item can be followed by an idented line or another list

133

item (but only if singleline is True).

133

item (but only if singleline is True).

134

"""

134

"""

135

line1 = lines[i]

135

line1 = lines[i]

136

line2 = i + 1 < len(lines) and lines[i + 1] or ''

136

line2 = i + 1 < len(lines) and lines[i + 1] or ''

137

if not itemre.match(line1):

137

if not itemre.match(line1):

138

return False

138

return False

139

if singleline:

139

if singleline:

140

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

140

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

141

else:

141

else:

142

return line2.startswith(' ')

142

return line2.startswith(' ')

143

144

i = 0

144

i = 0

145

while i < len(blocks):

145

while i < len(blocks):

146

if blocks[i]['type'] == 'paragraph':

146

if blocks[i]['type'] == 'paragraph':

147

lines = blocks[i]['lines']

147

lines = blocks[i]['lines']

148

for type, itemre, singleline in listtypes:

148

for type, itemre, singleline in listtypes:

149

if match(lines, 0, itemre, singleline):

149

if match(lines, 0, itemre, singleline):

150

items = []

150

items = []

151

for j, line in enumerate(lines):

151

for j, line in enumerate(lines):

152

if match(lines, j, itemre, singleline):

152

if match(lines, j, itemre, singleline):

153

items.append(dict(type=type, lines=[],

153

items.append(dict(type=type, lines=[],

154

indent=blocks[i]['indent']))

154

indent=blocks[i]['indent']))

155

items[-1]['lines'].append(line)

155

items[-1]['lines'].append(line)

156

blocks[i:i + 1] = items

156

blocks[i:i + 1] = items

157

break

157

break

158

i += 1

158

i += 1

159

return blocks

159

return blocks

160

161

162

_fieldwidth = 12

162

_fieldwidth = 12

163

164

def updatefieldlists(blocks):

164

def updatefieldlists(blocks):

165

"""Find key and maximum key width for field lists."""

165

"""Find key and maximum key width for field lists."""

166

i = 0

166

i = 0

167

while i < len(blocks):

167

while i < len(blocks):

168

if blocks[i]['type'] != 'field':

168

if blocks[i]['type'] != 'field':

169

i += 1

169

i += 1

170

continue

170

continue

171

172

keywidth = 0

172

keywidth = 0

173

j = i

173

j = i

174

while j < len(blocks) and blocks[j]['type'] == 'field':

174

while j < len(blocks) and blocks[j]['type'] == 'field':

175

m = _fieldre.match(blocks[j]['lines'][0])

175

m = _fieldre.match(blocks[j]['lines'][0])

176

key, rest = m.groups()

176

key, rest = m.groups()

177

blocks[j]['lines'][0] = rest

177

blocks[j]['lines'][0] = rest

178

blocks[j]['key'] = key

178

blocks[j]['key'] = key

179

keywidth = max(keywidth, len(key))

179

keywidth = max(keywidth, len(key))

180

j += 1

180

j += 1

181

182

for block in blocks[i:j]:

182

for block in blocks[i:j]:

183

block['keywidth'] = keywidth

183

block['keywidth'] = keywidth

184

i = j + 1

184

i = j + 1

185

186

return blocks

186

return blocks

187

188

189

def prunecontainers(blocks, keep):

189

def prunecontainers(blocks, keep):

190

"""Prune unwanted containers.

190

"""Prune unwanted containers.

191

192

The blocks must have a 'type' field, i.e., they should have been

192

The blocks must have a 'type' field, i.e., they should have been

193

run through findliteralblocks first.

193

run through findliteralblocks first.

194

"""

194

"""

195

pruned = []

195

pruned = []

196

i = 0

196

i = 0

197

while i + 1 < len(blocks):

197

while i + 1 < len(blocks):

198

# Searching for a block that looks like this:

198

# Searching for a block that looks like this:

199

#

199

#

200

# +-------+---------------------------+

200

# +-------+---------------------------+

201

# | ".. container ::" type |

201

# | ".. container ::" type |

202

# +---+ |

202

# +---+ |

203

# | blocks |

203

# | blocks |

204

# +-------------------------------+

204

# +-------------------------------+

205

if (blocks[i]['type'] == 'paragraph' and

205

if (blocks[i]['type'] == 'paragraph' and

206

blocks[i]['lines'][0].startswith('.. container::')):

206

blocks[i]['lines'][0].startswith('.. container::')):

207

indent = blocks[i]['indent']

207

indent = blocks[i]['indent']

208

adjustment = blocks[i + 1]['indent'] - indent

208

adjustment = blocks[i + 1]['indent'] - indent

209

containertype = blocks[i]['lines'][0][15:]

209

containertype = blocks[i]['lines'][0][15:]

210

prune = containertype not in keep

210

prune = containertype not in keep

211

if prune:

211

if prune:

212

pruned.append(containertype)

212

pruned.append(containertype)

213

214

# Always delete "..container:: type" block

214

# Always delete "..container:: type" block

215

del blocks[i]

215

del blocks[i]

216

j = i

216

j = i

217

while j < len(blocks) and blocks[j]['indent'] > indent:

217

while j < len(blocks) and blocks[j]['indent'] > indent:

218

if prune:

218

if prune:

219

del blocks[j]

219

del blocks[j]

220

i -= 1 # adjust outer index

220

i -= 1 # adjust outer index

221

else:

221

else:

222

blocks[j]['indent'] -= adjustment

222

blocks[j]['indent'] -= adjustment

223

j += 1

223

j += 1

224

i += 1

224

i += 1

225

return blocks, pruned

225

return blocks, pruned

226

227

228

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

228

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

229

230

def findsections(blocks):

230

def findsections(blocks):

231

"""Finds sections.

231

"""Finds sections.

232

233

The blocks must have a 'type' field, i.e., they should have been

233

The blocks must have a 'type' field, i.e., they should have been

234

run through findliteralblocks first.

234

run through findliteralblocks first.

235

"""

235

"""

236

for block in blocks:

236

for block in blocks:

237

# Searching for a block that looks like this:

237

# Searching for a block that looks like this:

238

#

238

#

239

# +------------------------------+

239

# +------------------------------+

240

# | Section title |

240

# | Section title |

241

# | ------------- |

241

# | ------------- |

242

# +------------------------------+

242

# +------------------------------+

243

if (block['type'] == 'paragraph' and

243

if (block['type'] == 'paragraph' and

244

len(block['lines']) == 2 and

244

len(block['lines']) == 2 and

245

len(block['lines'][0]) == len(block['lines'][1]) and

245

len(block['lines'][0]) == len(block['lines'][1]) and

246

_sectionre.match(block['lines'][1])):

246

_sectionre.match(block['lines'][1])):

247

block['underline'] = block['lines'][1][0]

247

block['underline'] = block['lines'][1][0]

248

block['type'] = 'section'

248

block['type'] = 'section'

249

del block['lines'][1]

249

del block['lines'][1]

250

return blocks

250

return blocks

251

252

253

def inlineliterals(blocks):

253

def inlineliterals(blocks):

254

for b in blocks:

254

for b in blocks:

255

if b['type'] in ('paragraph', 'section'):

255

if b['type'] in ('paragraph', 'section'):

256

b['lines'] = [l.replace('``', '"') for l in b['lines']]

256

b['lines'] = [l.replace('``', '"') for l in b['lines']]

257

return blocks

257

return blocks

258

259

260

_hgrolere = re.compile(r':hg:`([^`]+)`')

261

262

def hgrole(blocks):

260

def hgrole(blocks):

263

for b in blocks:

261

for b in blocks:

264

if b['type'] in ('paragraph', 'section'):

262

if b['type'] in ('paragraph', 'section'):

265

b['lines'] = [_hgrolere.sub(r'"hg \1"', l) for l in b['lines']]

263

# Turn :hg:`command` into "hg command". This also works

264

# when there is a line break in the command and relies on

265

# the fact that we have no stray back-quotes in the input

266

# (run the blocks through inlineliterals first).

267

b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')

268

for l in b['lines']]

266

return blocks

269

return blocks

267

270

268

271

269

def addmargins(blocks):

272

def addmargins(blocks):

270

"""Adds empty blocks for vertical spacing.

273

"""Adds empty blocks for vertical spacing.

271

274

272

This groups bullets, options, and definitions together with no vertical

275

This groups bullets, options, and definitions together with no vertical

273

space between them, and adds an empty block between all other blocks.

276

space between them, and adds an empty block between all other blocks.

274

"""

277

"""

275

i = 1

278

i = 1

276

while i < len(blocks):

279

while i < len(blocks):

277

if (blocks[i]['type'] == blocks[i - 1]['type'] and

280

if (blocks[i]['type'] == blocks[i - 1]['type'] and

278

blocks[i]['type'] in ('bullet', 'option', 'field')):

281

blocks[i]['type'] in ('bullet', 'option', 'field')):

279

i += 1

282

i += 1

280

else:

283

else:

281

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

284

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

282

i += 2

285

i += 2

283

return blocks

286

return blocks

284

287

285

288

286

def formatblock(block, width):

289

def formatblock(block, width):

287

"""Format a block according to width."""

290

"""Format a block according to width."""

288

if width <= 0:

291

if width <= 0:

289

width = 78

292

width = 78

290

indent = ' ' * block['indent']

293

indent = ' ' * block['indent']

291

if block['type'] == 'margin':

294

if block['type'] == 'margin':

292

return ''

295

return ''

293

if block['type'] == 'literal':

296

if block['type'] == 'literal':

294

indent += ' '

297

indent += ' '

295

return indent + ('\n' + indent).join(block['lines'])

298

return indent + ('\n' + indent).join(block['lines'])

296

if block['type'] == 'section':

299

if block['type'] == 'section':

297

underline = len(block['lines'][0]) * block['underline']

300

underline = len(block['lines'][0]) * block['underline']

298

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

301

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

299

if block['type'] == 'definition':

302

if block['type'] == 'definition':

300

term = indent + block['lines'][0]

303

term = indent + block['lines'][0]

301

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

304

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

302

defindent = indent + hang * ' '

305

defindent = indent + hang * ' '

303

text = ' '.join(map(str.strip, block['lines'][1:]))

306

text = ' '.join(map(str.strip, block['lines'][1:]))

304

return "%s\n%s" % (term, textwrap.fill(text, width=width,

307

return "%s\n%s" % (term, textwrap.fill(text, width=width,

305

initial_indent=defindent,

308

initial_indent=defindent,

306

subsequent_indent=defindent))

309

subsequent_indent=defindent))

307

subindent = indent

310

subindent = indent

308

if block['type'] == 'bullet':

311

if block['type'] == 'bullet':

309

if block['lines'][0].startswith('| '):

312

if block['lines'][0].startswith('| '):

310

# Remove bullet for line blocks and add no extra

313

# Remove bullet for line blocks and add no extra

311

# indention.

314

# indention.

312

block['lines'][0] = block['lines'][0][2:]

315

block['lines'][0] = block['lines'][0][2:]

313

else:

316

else:

314

m = _bulletre.match(block['lines'][0])

317

m = _bulletre.match(block['lines'][0])

315

subindent = indent + m.end() * ' '

318

subindent = indent + m.end() * ' '

316

elif block['type'] == 'field':

319

elif block['type'] == 'field':

317

keywidth = block['keywidth']

320

keywidth = block['keywidth']

318

key = block['key']

321

key = block['key']

319

322

320

subindent = indent + _fieldwidth * ' '

323

subindent = indent + _fieldwidth * ' '

321

if len(key) + 2 > _fieldwidth:

324

if len(key) + 2 > _fieldwidth:

322

# key too large, use full line width

325

# key too large, use full line width

323

key = key.ljust(width)

326

key = key.ljust(width)

324

elif keywidth + 2 < _fieldwidth:

327

elif keywidth + 2 < _fieldwidth:

325

# all keys are small, add only two spaces

328

# all keys are small, add only two spaces

326

key = key.ljust(keywidth + 2)

329

key = key.ljust(keywidth + 2)

327

subindent = indent + (keywidth + 2) * ' '

330

subindent = indent + (keywidth + 2) * ' '

328

else:

331

else:

329

# mixed sizes, use fieldwidth for this one

332

# mixed sizes, use fieldwidth for this one

330

key = key.ljust(_fieldwidth)

333

key = key.ljust(_fieldwidth)

331

block['lines'][0] = key + block['lines'][0]

334

block['lines'][0] = key + block['lines'][0]

332

elif block['type'] == 'option':

335

elif block['type'] == 'option':

333

m = _optionre.match(block['lines'][0])

336

m = _optionre.match(block['lines'][0])

334

option, arg, rest = m.groups()

337

option, arg, rest = m.groups()

335

subindent = indent + (len(option) + len(arg)) * ' '

338

subindent = indent + (len(option) + len(arg)) * ' '

336

339

337

text = ' '.join(map(str.strip, block['lines']))

340

text = ' '.join(map(str.strip, block['lines']))

338

return textwrap.fill(text, width=width,

341

return textwrap.fill(text, width=width,

339

initial_indent=indent,

342

initial_indent=indent,

340

subsequent_indent=subindent)

343

subsequent_indent=subindent)

341

344

342

345

343

def format(text, width, indent=0, keep=None):

346

def format(text, width, indent=0, keep=None):

344

"""Parse and format the text according to width."""

347

"""Parse and format the text according to width."""

345

blocks = findblocks(text)

348

blocks = findblocks(text)

346

for b in blocks:

349

for b in blocks:

347

b['indent'] += indent

350

b['indent'] += indent

348

blocks = findliteralblocks(blocks)

351

blocks = findliteralblocks(blocks)

349

blocks, pruned = prunecontainers(blocks, keep or [])

352

blocks, pruned = prunecontainers(blocks, keep or [])

350

blocks = findsections(blocks)

353

blocks = findsections(blocks)

351

blocks = inlineliterals(blocks)

354

blocks = inlineliterals(blocks)

352

blocks = hgrole(blocks)

355

blocks = hgrole(blocks)

353

blocks = splitparagraphs(blocks)

356

blocks = splitparagraphs(blocks)

354

blocks = updatefieldlists(blocks)

357

blocks = updatefieldlists(blocks)

355

blocks = addmargins(blocks)

358

blocks = addmargins(blocks)

356

text = '\n'.join(formatblock(b, width) for b in blocks)

359

text = '\n'.join(formatblock(b, width) for b in blocks)

357

if keep is None:

360

if keep is None:

358

return text

361

return text

359

else:

362

else:

360

return text, pruned

363

return text, pruned

361

364

362

365

363

if __name__ == "__main__":

366

if __name__ == "__main__":

364

from pprint import pprint

367

from pprint import pprint

365

368

366

def debug(func, *args):

369

def debug(func, *args):

367

blocks = func(*args)

370

blocks = func(*args)

368

print "*** after %s:" % func.__name__

371

print "*** after %s:" % func.__name__

369

pprint(blocks)

372

pprint(blocks)

370

print

373

print

371

return blocks

374

return blocks

372

375

373

text = open(sys.argv[1]).read()

376

text = open(sys.argv[1]).read()

374

blocks = debug(findblocks, text)

377

blocks = debug(findblocks, text)

375

blocks = debug(findliteralblocks, blocks)

378

blocks = debug(findliteralblocks, blocks)

376

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

379

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

377

blocks = debug(inlineliterals, blocks)

380

blocks = debug(inlineliterals, blocks)

378

blocks = debug(splitparagraphs, blocks)

381

blocks = debug(splitparagraphs, blocks)

379

blocks = debug(updatefieldlists, blocks)

382

blocks = debug(updatefieldlists, blocks)

380

blocks = debug(findsections, blocks)

383

blocks = debug(findsections, blocks)

381

blocks = debug(addmargins, blocks)

384

blocks = debug(addmargins, blocks)

382

print '\n'.join(formatblock(b, 30) for b in blocks)

385

print '\n'.join(formatblock(b, 30) for b in blocks)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # minirst.py - minimal reStructuredText parser
             #
             # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """simplified reStructuredText parser.
             This parser knows just enough about reStructuredText to parse the
             Mercurial docstrings.
             It cheats in a major way: nested blocks are not really nested. They
             are just indented blocks that look like they are nested. This relies
             on the user to keep the right indentation for the blocks.
             It only supports a small subset of reStructuredText:
             - sections
             - paragraphs
             - literal blocks
             - definition lists
             - bullet lists (items must start with '-')
             - enumerated lists (no autonumbering)
             - field lists (colons cannot be escaped)
             - option lists (supports only long options without arguments)
             - inline literals (no other inline markup is not recognized)
             """
             import re, sys, textwrap
             def findblocks(text):
                 """Find continuous blocks of lines in text.
                 Returns a list of dictionaries representing the blocks. Each block
                 has an 'indent' field and a 'lines' field.
                 """
                 blocks = [[]]
                 lines = text.splitlines()
                 for line in lines:
                     if line.strip():
                         blocks[-1].append(line)
                     elif blocks[-1]:
                         blocks.append([])
                 if not blocks[-1]:
                     del blocks[-1]
                 for i, block in enumerate(blocks):
                     indent = min((len(l) - len(l.lstrip())) for l in block)
                     blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
                 return blocks
             def findliteralblocks(blocks):
                 """Finds literal blocks and adds a 'type' field to the blocks.
                 Literal blocks are given the type 'literal', all other blocks are
                 given type the 'paragraph'.
                 """
                 i = 0
                 while i < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | paragraph                    |
                     # | (ends with "::")             |
                     # +------------------------------+
                     #    +---------------------------+
                     #    | indented literal block    |
                     #    +---------------------------+
                     blocks[i]['type'] = 'paragraph'
                     if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         if blocks[i]['lines'] == ['::']:
                             # Expanded form: remove block
                             del blocks[i]
                             i -= 1
                         elif blocks[i]['lines'][-1].endswith(' ::'):
                             # Partially minimized form: remove space and both
                             # colons.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
                         else:
                             # Fully minimized form: remove just one colon.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
                         # List items are formatted with a hanging indent. We must
                         # correct for this here while we still have the original
                         # information on the indentation of the subsequent literal
                         # blocks available.
                         m = _bulletre.match(blocks[i]['lines'][0])
                         if m:
                             indent += m.end()
                             adjustment -= m.end()
                         # Mark the following indented blocks.
                         while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
                             blocks[i + 1]['type'] = 'literal'
                             blocks[i + 1]['indent'] -= adjustment
                             i += 1
                     i += 1
                 return blocks
             _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
             _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
             _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
             _definitionre = re.compile(r'[^ ]')
             def splitparagraphs(blocks):
                 """Split paragraphs into lists."""
                 # Tuples with (list type, item regexp, single line items?). Order
                 # matters: definition lists has the least specific regexp and must
                 # come last.
                 listtypes = [('bullet', _bulletre, True),
                              ('option', _optionre, True),
                              ('field', _fieldre, True),
                              ('definition', _definitionre, False)]
                 def match(lines, i, itemre, singleline):
                     """Does itemre match an item at line i?
                     A list item can be followed by an idented line or another list
                     item (but only if singleline is True).
                     """
                     line1 = lines[i]
                     line2 = i + 1 < len(lines) and lines[i + 1] or ''
                     if not itemre.match(line1):
                         return False
                     if singleline:
                         return line2 == '' or line2[0] == ' ' or itemre.match(line2)
                     else:
                         return line2.startswith(' ')
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] == 'paragraph':
                         lines = blocks[i]['lines']
                         for type, itemre, singleline in listtypes:
                             if match(lines, 0, itemre, singleline):
                                 items = []
                                 for j, line in enumerate(lines):
                                     if match(lines, j, itemre, singleline):
                                         items.append(dict(type=type, lines=[],
                                                           indent=blocks[i]['indent']))
                                     items[-1]['lines'].append(line)
                                 blocks[i:i + 1] = items
                                 break
                     i += 1
                 return blocks
             _fieldwidth = 12
             def updatefieldlists(blocks):
                 """Find key and maximum key width for field lists."""
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] != 'field':
                         i += 1
                         continue
                     keywidth = 0
                     j = i
                     while j < len(blocks) and blocks[j]['type'] == 'field':
                         m = _fieldre.match(blocks[j]['lines'][0])
                         key, rest = m.groups()
                         blocks[j]['lines'][0] = rest
                         blocks[j]['key'] = key
                         keywidth = max(keywidth, len(key))
                         j += 1
                     for block in blocks[i:j]:
                         block['keywidth'] = keywidth
                     i = j + 1
                 return blocks
             def prunecontainers(blocks, keep):
                 """Prune unwanted containers.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 pruned = []
                 i = 0
                 while i + 1 < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +-------+---------------------------+
                     # | ".. container ::" type            |
                     # +---+                               |
                     #     | blocks                        |
                     #     +-------------------------------+
                     if (blocks[i]['type'] == 'paragraph' and
                         blocks[i]['lines'][0].startswith('.. container::')):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         containertype = blocks[i]['lines'][0][15:]
                         prune = containertype not in keep
                         if prune:
                             pruned.append(containertype)
                         # Always delete "..container:: type" block
                         del blocks[i]
                         j = i
                         while j < len(blocks) and blocks[j]['indent'] > indent:
                             if prune:
                                 del blocks[j]
                                 i -= 1 # adjust outer index
                             else:
                                 blocks[j]['indent'] -= adjustment
                                 j += 1
                     i += 1
                 return blocks, pruned
             _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
             def findsections(blocks):
                 """Finds sections.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 for block in blocks:
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | Section title                |
                     # | -------------                |
                     # +------------------------------+
                     if (block['type'] == 'paragraph' and
                         len(block['lines']) == 2 and
                         len(block['lines'][0]) == len(block['lines'][1]) and
                         _sectionre.match(block['lines'][1])):
                         block['underline'] = block['lines'][1][0]
                         block['type'] = 'section'
                         del block['lines'][1]
                 return blocks
             def inlineliterals(blocks):
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         b['lines'] = [l.replace('``', '"') for l in b['lines']]
                 return blocks
-            _hgrolere = re.compile(r':hg:`([^`]+)`')
             def hgrole(blocks):
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
-                        b['lines'] = [_hgrolere.sub(r'"hg \1"', l) for l in b['lines']]
+                        # Turn :hg:`command` into "hg command". This also works
+                        # when there is a line break in the command and relies on
+                        # the fact that we have no stray back-quotes in the input
+                        # (run the blocks through inlineliterals first).
+                        b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')
+                                      for l in b['lines']]
                 return blocks
             def addmargins(blocks):
                 """Adds empty blocks for vertical spacing.
                 This groups bullets, options, and definitions together with no vertical
                 space between them, and adds an empty block between all other blocks.
                 """
                 i = 1
                 while i < len(blocks):
                     if (blocks[i]['type'] == blocks[i - 1]['type'] and
                         blocks[i]['type'] in ('bullet', 'option', 'field')):
                         i += 1
                     else:
                         blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
                         i += 2
                 return blocks
             def formatblock(block, width):
                 """Format a block according to width."""
                 if width <= 0:
                     width = 78
                 indent = ' ' * block['indent']
                 if block['type'] == 'margin':
                     return ''
                 if block['type'] == 'literal':
                     indent += '  '
                     return indent + ('\n' + indent).join(block['lines'])
                 if block['type'] == 'section':
                     underline = len(block['lines'][0]) * block['underline']
                     return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
                 if block['type'] == 'definition':
                     term = indent + block['lines'][0]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines'][1:]))
                     return "%s\n%s" % (term, textwrap.fill(text, width=width,
                                                            initial_indent=defindent,
                                                            subsequent_indent=defindent))
                 subindent = indent
                 if block['type'] == 'bullet':
                     if block['lines'][0].startswith('| '):
                         # Remove bullet for line blocks and add no extra
                         # indention.
                         block['lines'][0] = block['lines'][0][2:]
                     else:
                         m = _bulletre.match(block['lines'][0])
                         subindent = indent + m.end() * ' '
                 elif block['type'] == 'field':
                     keywidth = block['keywidth']
                     key = block['key']
                     subindent = indent + _fieldwidth * ' '
                     if len(key) + 2 > _fieldwidth:
                         # key too large, use full line width
                         key = key.ljust(width)
                     elif keywidth + 2 < _fieldwidth:
                         # all keys are small, add only two spaces
                         key = key.ljust(keywidth + 2)
                         subindent = indent + (keywidth + 2) * ' '
                     else:
                         # mixed sizes, use fieldwidth for this one
                         key = key.ljust(_fieldwidth)
                     block['lines'][0] = key + block['lines'][0]
                 elif block['type'] == 'option':
                     m = _optionre.match(block['lines'][0])
                     option, arg, rest = m.groups()
                     subindent = indent + (len(option) + len(arg)) * ' '
                 text = ' '.join(map(str.strip, block['lines']))
                 return textwrap.fill(text, width=width,
                                      initial_indent=indent,
                                      subsequent_indent=subindent)
             def format(text, width, indent=0, keep=None):
                 """Parse and format the text according to width."""
                 blocks = findblocks(text)
                 for b in blocks:
                     b['indent'] += indent
                 blocks = findliteralblocks(blocks)
                 blocks, pruned = prunecontainers(blocks, keep or [])
                 blocks = findsections(blocks)
                 blocks = inlineliterals(blocks)
                 blocks = hgrole(blocks)
                 blocks = splitparagraphs(blocks)
                 blocks = updatefieldlists(blocks)
                 blocks = addmargins(blocks)
                 text = '\n'.join(formatblock(b, width) for b in blocks)
                 if keep is None:
                     return text
                 else:
                     return text, pruned
             if __name__ == "__main__":
                 from pprint import pprint
                 def debug(func, *args):
                     blocks = func(*args)
                     print "*** after %s:" % func.__name__
                     pprint(blocks)
                     print
                     return blocks
                 text = open(sys.argv[1]).read()
                 blocks = debug(findblocks, text)
                 blocks = debug(findliteralblocks, blocks)
                 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
                 blocks = debug(inlineliterals, blocks)
                 blocks = debug(splitparagraphs, blocks)
                 blocks = debug(updatefieldlists, blocks)
                 blocks = debug(findsections, blocks)
                 blocks = debug(addmargins, blocks)
                 print '\n'.join(formatblock(b, 30) for b in blocks)