upstream/mercurial-mirror Commit - r15014:a814e986

1

# minirst.py - minimal reStructuredText parser

1

# minirst.py - minimal reStructuredText parser

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""simplified reStructuredText parser.

8

"""simplified reStructuredText parser.

9

10

This parser knows just enough about reStructuredText to parse the

10

This parser knows just enough about reStructuredText to parse the

11

Mercurial docstrings.

11

Mercurial docstrings.

12

13

It cheats in a major way: nested blocks are not really nested. They

13

It cheats in a major way: nested blocks are not really nested. They

14

are just indented blocks that look like they are nested. This relies

14

are just indented blocks that look like they are nested. This relies

15

on the user to keep the right indentation for the blocks.

15

on the user to keep the right indentation for the blocks.

16

17

Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide

17

Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide

18

when adding support for new constructs.

18

when adding support for new constructs.

19

"""

19

"""

20

21

import re, sys

21

import re, sys

22

import util, encoding

22

import util, encoding

23

from i18n import _

23

from i18n import _

24

25

26

def replace(text, substs):

26

def replace(text, substs):

27

utext = text.decode(encoding.encoding)

27

utext = text.decode(encoding.encoding)

28

for f, t in substs:

28

for f, t in substs:

29

utext = utext.replace(f, t)

29

utext = utext.replace(f, t)

30

return utext.encode(encoding.encoding)

30

return utext.encode(encoding.encoding)

31

32

33

_blockre = re.compile(r"\n(?:\s*\n)+")

33

_blockre = re.compile(r"\n(?:\s*\n)+")

34

35

def findblocks(text):

35

def findblocks(text):

36

"""Find continuous blocks of lines in text.

36

"""Find continuous blocks of lines in text.

37

38

Returns a list of dictionaries representing the blocks. Each block

38

Returns a list of dictionaries representing the blocks. Each block

39

has an 'indent' field and a 'lines' field.

39

has an 'indent' field and a 'lines' field.

40

"""

40

"""

41

blocks = []

41

blocks = []

42

for b in _blockre.split(text.strip()):

42

for b in _blockre.split(text.strip()):

43

lines = b.splitlines()

43

lines = b.splitlines()

44

indent = min((len(l) - len(l.lstrip())) for l in lines)

44

indent = min((len(l) - len(l.lstrip())) for l in lines)

45

lines = [l[indent:] for l in lines]

45

lines = [l[indent:] for l in lines]

46

blocks.append(dict(indent=indent, lines=lines))

46

blocks.append(dict(indent=indent, lines=lines))

47

return blocks

47

return blocks

48

49

50

def findliteralblocks(blocks):

50

def findliteralblocks(blocks):

51

"""Finds literal blocks and adds a 'type' field to the blocks.

51

"""Finds literal blocks and adds a 'type' field to the blocks.

52

53

Literal blocks are given the type 'literal', all other blocks are

53

Literal blocks are given the type 'literal', all other blocks are

54

given type the 'paragraph'.

54

given type the 'paragraph'.

55

"""

55

"""

56

i = 0

56

i = 0

57

while i < len(blocks):

57

while i < len(blocks):

58

# Searching for a block that looks like this:

58

# Searching for a block that looks like this:

59

#

59

#

60

# +------------------------------+

60

# +------------------------------+

61

# | paragraph |

61

# | paragraph |

62

# | (ends with "::") |

62

# | (ends with "::") |

63

# +------------------------------+

63

# +------------------------------+

64

# +---------------------------+

64

# +---------------------------+

65

# | indented literal block |

65

# | indented literal block |

66

# +---------------------------+

66

# +---------------------------+

67

blocks[i]['type'] = 'paragraph'

67

blocks[i]['type'] = 'paragraph'

68

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

68

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

69

indent = blocks[i]['indent']

69

indent = blocks[i]['indent']

70

adjustment = blocks[i + 1]['indent'] - indent

70

adjustment = blocks[i + 1]['indent'] - indent

71

72

if blocks[i]['lines'] == ['::']:

72

if blocks[i]['lines'] == ['::']:

73

# Expanded form: remove block

73

# Expanded form: remove block

74

del blocks[i]

74

del blocks[i]

75

i -= 1

75

i -= 1

76

elif blocks[i]['lines'][-1].endswith(' ::'):

76

elif blocks[i]['lines'][-1].endswith(' ::'):

77

# Partially minimized form: remove space and both

77

# Partially minimized form: remove space and both

78

# colons.

78

# colons.

79

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

79

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

80

else:

80

else:

81

# Fully minimized form: remove just one colon.

81

# Fully minimized form: remove just one colon.

82

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

82

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

83

84

# List items are formatted with a hanging indent. We must

84

# List items are formatted with a hanging indent. We must

85

# correct for this here while we still have the original

85

# correct for this here while we still have the original

86

# information on the indentation of the subsequent literal

86

# information on the indentation of the subsequent literal

87

# blocks available.

87

# blocks available.

88

m = _bulletre.match(blocks[i]['lines'][0])

88

m = _bulletre.match(blocks[i]['lines'][0])

89

if m:

89

if m:

90

indent += m.end()

90

indent += m.end()

91

adjustment -= m.end()

91

adjustment -= m.end()

92

93

# Mark the following indented blocks.

93

# Mark the following indented blocks.

94

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

94

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

95

blocks[i + 1]['type'] = 'literal'

95

blocks[i + 1]['type'] = 'literal'

96

blocks[i + 1]['indent'] -= adjustment

96

blocks[i + 1]['indent'] -= adjustment

97

i += 1

97

i += 1

98

i += 1

98

i += 1

99

return blocks

99

return blocks

100

101

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

101

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

102

_optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'

102

_optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'

103

r'((.*) +)(.*)$')

103

r'((.*) +)(.*)$')

104

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

104

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

105

_definitionre = re.compile(r'[^ ]')

105

_definitionre = re.compile(r'[^ ]')

106

107

def splitparagraphs(blocks):

107

def splitparagraphs(blocks):

108

"""Split paragraphs into lists."""

108

"""Split paragraphs into lists."""

109

# Tuples with (list type, item regexp, single line items?). Order

109

# Tuples with (list type, item regexp, single line items?). Order

110

# matters: definition lists has the least specific regexp and must

110

# matters: definition lists has the least specific regexp and must

111

# come last.

111

# come last.

112

listtypes = [('bullet', _bulletre, True),

112

listtypes = [('bullet', _bulletre, True),

113

('option', _optionre, True),

113

('option', _optionre, True),

114

('field', _fieldre, True),

114

('field', _fieldre, True),

115

('definition', _definitionre, False)]

115

('definition', _definitionre, False)]

116

117

def match(lines, i, itemre, singleline):

117

def match(lines, i, itemre, singleline):

118

"""Does itemre match an item at line i?

118

"""Does itemre match an item at line i?

119

120

A list item can be followed by an idented line or another list

120

A list item can be followed by an idented line or another list

121

item (but only if singleline is True).

121

item (but only if singleline is True).

122

"""

122

"""

123

line1 = lines[i]

123

line1 = lines[i]

124

line2 = i + 1 < len(lines) and lines[i + 1] or ''

124

line2 = i + 1 < len(lines) and lines[i + 1] or ''

125

if not itemre.match(line1):

125

if not itemre.match(line1):

126

return False

126

return False

127

if singleline:

127

if singleline:

128

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

128

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

129

else:

129

else:

130

return line2.startswith(' ')

130

return line2.startswith(' ')

131

132

i = 0

132

i = 0

133

while i < len(blocks):

133

while i < len(blocks):

134

if blocks[i]['type'] == 'paragraph':

134

if blocks[i]['type'] == 'paragraph':

135

lines = blocks[i]['lines']

135

lines = blocks[i]['lines']

136

for type, itemre, singleline in listtypes:

136

for type, itemre, singleline in listtypes:

137

if match(lines, 0, itemre, singleline):

137

if match(lines, 0, itemre, singleline):

138

items = []

138

items = []

139

for j, line in enumerate(lines):

139

for j, line in enumerate(lines):

140

if match(lines, j, itemre, singleline):

140

if match(lines, j, itemre, singleline):

141

items.append(dict(type=type, lines=[],

141

items.append(dict(type=type, lines=[],

142

indent=blocks[i]['indent']))

142

indent=blocks[i]['indent']))

143

items[-1]['lines'].append(line)

143

items[-1]['lines'].append(line)

144

blocks[i:i + 1] = items

144

blocks[i:i + 1] = items

145

break

145

break

146

i += 1

146

i += 1

147

return blocks

147

return blocks

148

149

150

_fieldwidth = 12

150

_fieldwidth = 12

151

152

def updatefieldlists(blocks):

152

def updatefieldlists(blocks):

153

"""Find key and maximum key width for field lists."""

153

"""Find key and maximum key width for field lists."""

154

i = 0

154

i = 0

155

while i < len(blocks):

155

while i < len(blocks):

156

if blocks[i]['type'] != 'field':

156

if blocks[i]['type'] != 'field':

157

i += 1

157

i += 1

158

continue

158

continue

159

160

keywidth = 0

160

keywidth = 0

161

j = i

161

j = i

162

while j < len(blocks) and blocks[j]['type'] == 'field':

162

while j < len(blocks) and blocks[j]['type'] == 'field':

163

m = _fieldre.match(blocks[j]['lines'][0])

163

m = _fieldre.match(blocks[j]['lines'][0])

164

key, rest = m.groups()

164

key, rest = m.groups()

165

blocks[j]['lines'][0] = rest

165

blocks[j]['lines'][0] = rest

166

blocks[j]['key'] = key

166

blocks[j]['key'] = key

167

keywidth = max(keywidth, len(key))

167

keywidth = max(keywidth, len(key))

168

j += 1

168

j += 1

169

170

for block in blocks[i:j]:

170

for block in blocks[i:j]:

171

block['keywidth'] = keywidth

171

block['keywidth'] = keywidth

172

i = j + 1

172

i = j + 1

173

174

return blocks

174

return blocks

175

176

177

def updateoptionlists(blocks):

177

def updateoptionlists(blocks):

178

i = 0

178

i = 0

179

while i < len(blocks):

179

while i < len(blocks):

180

if blocks[i]['type'] != 'option':

180

if blocks[i]['type'] != 'option':

181

i += 1

181

i += 1

182

continue

182

continue

183

184

optstrwidth = 0

184

optstrwidth = 0

185

j = i

185

j = i

186

while j < len(blocks) and blocks[j]['type'] == 'option':

186

while j < len(blocks) and blocks[j]['type'] == 'option':

187

m = _optionre.match(blocks[j]['lines'][0])

187

m = _optionre.match(blocks[j]['lines'][0])

188

189

shortoption = m.group(2)

189

shortoption = m.group(2)

190

group3 = m.group(3)

190

group3 = m.group(3)

191

longoption = group3[2:].strip()

191

longoption = group3[2:].strip()

192

desc = m.group(6).strip()

192

desc = m.group(6).strip()

193

longoptionarg = m.group(5).strip()

193

longoptionarg = m.group(5).strip()

194

blocks[j]['lines'][0] = desc

194

blocks[j]['lines'][0] = desc

195

196

noshortop = ''

196

noshortop = ''

197

if not shortoption:

197

if not shortoption:

198

noshortop = ' '

198

noshortop = ' '

199

200

opt = "%s%s" % (shortoption and "-%s " % shortoption or '',

200

opt = "%s%s" % (shortoption and "-%s " % shortoption or '',

201

("%s--%s %s") % (noshortop, longoption,

201

("%s--%s %s") % (noshortop, longoption,

202

longoptionarg))

202

longoptionarg))

203

opt = opt.rstrip()

203

opt = opt.rstrip()

204

blocks[j]['optstr'] = opt

204

blocks[j]['optstr'] = opt

205

optstrwidth = max(optstrwidth, encoding.colwidth(opt))

205

optstrwidth = max(optstrwidth, encoding.colwidth(opt))

206

j += 1

206

j += 1

207

208

for block in blocks[i:j]:

208

for block in blocks[i:j]:

209

block['optstrwidth'] = optstrwidth

209

block['optstrwidth'] = optstrwidth

210

i = j + 1

210

i = j + 1

211

return blocks

211

return blocks

212

213

def prunecontainers(blocks, keep):

213

def prunecontainers(blocks, keep):

214

"""Prune unwanted containers.

214

"""Prune unwanted containers.

215

216

The blocks must have a 'type' field, i.e., they should have been

216

The blocks must have a 'type' field, i.e., they should have been

217

run through findliteralblocks first.

217

run through findliteralblocks first.

218

"""

218

"""

219

pruned = []

219

pruned = []

220

i = 0

220

i = 0

221

while i + 1 < len(blocks):

221

while i + 1 < len(blocks):

222

# Searching for a block that looks like this:

222

# Searching for a block that looks like this:

223

#

223

#

224

# +-------+---------------------------+

224

# +-------+---------------------------+

225

# | ".. container ::" type |

225

# | ".. container ::" type |

226

# +---+ |

226

# +---+ |

227

# | blocks |

227

# | blocks |

228

# +-------------------------------+

228

# +-------------------------------+

229

if (blocks[i]['type'] == 'paragraph' and

229

if (blocks[i]['type'] == 'paragraph' and

230

blocks[i]['lines'][0].startswith('.. container::')):

230

blocks[i]['lines'][0].startswith('.. container::')):

231

indent = blocks[i]['indent']

231

indent = blocks[i]['indent']

232

adjustment = blocks[i + 1]['indent'] - indent

232

adjustment = blocks[i + 1]['indent'] - indent

233

containertype = blocks[i]['lines'][0][15:]

233

containertype = blocks[i]['lines'][0][15:]

234

prune = containertype not in keep

234

prune = containertype not in keep

235

if prune:

235

if prune:

236

pruned.append(containertype)

236

pruned.append(containertype)

237

238

# Always delete "..container:: type" block

238

# Always delete "..container:: type" block

239

del blocks[i]

239

del blocks[i]

240

j = i

240

j = i

241

while j < len(blocks) and blocks[j]['indent'] > indent:

241

while j < len(blocks) and blocks[j]['indent'] > indent:

242

if prune:

242

if prune:

243

del blocks[j]

243

del blocks[j]

244

i -= 1 # adjust outer index

244

i -= 1 # adjust outer index

245

else:

245

else:

246

blocks[j]['indent'] -= adjustment

246

blocks[j]['indent'] -= adjustment

247

j += 1

247

j += 1

248

i += 1

248

i += 1

249

return blocks, pruned

249

return blocks, pruned

250

251

252

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

252

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

253

254

def findsections(blocks):

254

def findsections(blocks):

255

"""Finds sections.

255

"""Finds sections.

256

257

The blocks must have a 'type' field, i.e., they should have been

257

The blocks must have a 'type' field, i.e., they should have been

258

run through findliteralblocks first.

258

run through findliteralblocks first.

259

"""

259

"""

260

for block in blocks:

260

for block in blocks:

261

# Searching for a block that looks like this:

261

# Searching for a block that looks like this:

262

#

262

#

263

# +------------------------------+

263

# +------------------------------+

264

# | Section title |

264

# | Section title |

265

# | ------------- |

265

# | ------------- |

266

# +------------------------------+

266

# +------------------------------+

267

if (block['type'] == 'paragraph' and

267

if (block['type'] == 'paragraph' and

268

len(block['lines']) == 2 and

268

len(block['lines']) == 2 and

269

encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and

269

encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and

270

_sectionre.match(block['lines'][1])):

270

_sectionre.match(block['lines'][1])):

271

block['underline'] = block['lines'][1][0]

271

block['underline'] = block['lines'][1][0]

272

block['type'] = 'section'

272

block['type'] = 'section'

273

del block['lines'][1]

273

del block['lines'][1]

274

return blocks

274

return blocks

275

276

277

def inlineliterals(blocks):

277

def inlineliterals(blocks):

278

substs = [('``', '"')]

278

substs = [('``', '"')]

279

for b in blocks:

279

for b in blocks:

280

if b['type'] in ('paragraph', 'section'):

280

if b['type'] in ('paragraph', 'section'):

281

b['lines'] = [replace(l, substs) for l in b['lines']]

281

b['lines'] = [replace(l, substs) for l in b['lines']]

282

return blocks

282

return blocks

283

284

285

def hgrole(blocks):

285

def hgrole(blocks):

286

substs = [(':hg:`', '"hg '), ('`', '"')]

286

substs = [(':hg:`', '"hg '), ('`', '"')]

287

for b in blocks:

287

for b in blocks:

288

if b['type'] in ('paragraph', 'section'):

288

if b['type'] in ('paragraph', 'section'):

289

# Turn :hg:`command` into "hg command". This also works

289

# Turn :hg:`command` into "hg command". This also works

290

# when there is a line break in the command and relies on

290

# when there is a line break in the command and relies on

291

# the fact that we have no stray back-quotes in the input

291

# the fact that we have no stray back-quotes in the input

292

# (run the blocks through inlineliterals first).

292

# (run the blocks through inlineliterals first).

293

b['lines'] = [replace(l, substs) for l in b['lines']]

293

b['lines'] = [replace(l, substs) for l in b['lines']]

294

return blocks

294

return blocks

295

296

297

def addmargins(blocks):

297

def addmargins(blocks):

298

"""Adds empty blocks for vertical spacing.

298

"""Adds empty blocks for vertical spacing.

299

300

This groups bullets, options, and definitions together with no vertical

300

This groups bullets, options, and definitions together with no vertical

301

space between them, and adds an empty block between all other blocks.

301

space between them, and adds an empty block between all other blocks.

302

"""

302

"""

303

i = 1

303

i = 1

304

while i < len(blocks):

304

while i < len(blocks):

305

if (blocks[i]['type'] == blocks[i - 1]['type'] and

305

if (blocks[i]['type'] == blocks[i - 1]['type'] and

306

blocks[i]['type'] in ('bullet', 'option', 'field')):

306

blocks[i]['type'] in ('bullet', 'option', 'field')):

307

i += 1

307

i += 1

308

else:

308

else:

309

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

309

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

310

i += 2

310

i += 2

311

return blocks

311

return blocks

312

313

def prunecomments(blocks):

313

def prunecomments(blocks):

314

"""Remove comments."""

314

"""Remove comments."""

315

i = 0

315

i = 0

316

while i < len(blocks):

316

while i < len(blocks):

317

b = blocks[i]

317

b = blocks[i]

318

if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or

318

if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or

319

b['lines'] == ['..']):

319

b['lines'] == ['..']):

320

del blocks[i]

320

del blocks[i]

321

if i < len(blocks) and blocks[i]['type'] == 'margin':

321

if i < len(blocks) and blocks[i]['type'] == 'margin':

322

del blocks[i]

322

del blocks[i]

323

else:

323

else:

324

i += 1

324

i += 1

325

return blocks

325

return blocks

326

327

_admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"

327

_admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"

328

328

329

flags=re.IGNORECASE)

329

flags=re.IGNORECASE)

330

331

def findadmonitions(blocks):

331

def findadmonitions(blocks):

332

"""

332

"""

333

Makes the type of the block an admonition block if

333

Makes the type of the block an admonition block if

334

the first line is an admonition directive

334

the first line is an admonition directive

335

"""

335

"""

336

i = 0

336

i = 0

337

while i < len(blocks):

337

while i < len(blocks):

338

m = _admonitionre.match(blocks[i]['lines'][0])

338

m = _admonitionre.match(blocks[i]['lines'][0])

339

if m:

339

if m:

340

blocks[i]['type'] = 'admonition'

340

blocks[i]['type'] = 'admonition'

341

admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()

341

admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()

342

343

firstline = blocks[i]['lines'][0][m.end() + 1:]

343

firstline = blocks[i]['lines'][0][m.end() + 1:]

344

if firstline:

344

if firstline:

345

blocks[i]['lines'].insert(1, ' ' + firstline)

345

blocks[i]['lines'].insert(1, ' ' + firstline)

346

347

blocks[i]['admonitiontitle'] = admonitiontitle

347

blocks[i]['admonitiontitle'] = admonitiontitle

348

del blocks[i]['lines'][0]

348

del blocks[i]['lines'][0]

349

i = i + 1

349

i = i + 1

350

return blocks

350

return blocks

351

352

_admonitiontitles = {'attention': _('Attention:'),

352

_admonitiontitles = {'attention': _('Attention:'),

353

'caution': _('Caution:'),

353

'caution': _('Caution:'),

354

'danger': _('!Danger!') ,

354

'danger': _('!Danger!') ,

355

'error': _('Error:'),

355

'error': _('Error:'),

356

'hint': _('Hint:'),

356

'hint': _('Hint:'),

357

'important': _('Important:'),

357

'important': _('Important:'),

358

'note': _('Note:'),

358

'note': _('Note:'),

359

'tip': _('Tip:'),

359

'tip': _('Tip:'),

360

'warning': _('Warning!')}

360

'warning': _('Warning!')}

361

362

def formatoption(block, width):

362

def formatoption(block, width):

363

desc = ' '.join(map(str.strip, block['lines']))

363

desc = ' '.join(map(str.strip, block['lines']))

364

colwidth = encoding.colwidth(block['optstr'])

364

colwidth = encoding.colwidth(block['optstr'])

365

usablewidth = width - 1

365

usablewidth = width - 1

366

hanging = block['optstrwidth']

366

hanging = block['optstrwidth']

367

initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))

367

initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))

368

hangindent = ' ' * (encoding.colwidth(initindent) + 1)

368

hangindent = ' ' * (encoding.colwidth(initindent) + 1)

369

return ' %s' % (util.wrap(desc, usablewidth,

369

return ' %s' % (util.wrap(desc, usablewidth,

370

initindent=initindent,

370

initindent=initindent,

371

hangindent=hangindent))

371

hangindent=hangindent))

372

373

def formatblock(block, width):

373

def formatblock(block, width):

374

"""Format a block according to width."""

374

"""Format a block according to width."""

375

if width <= 0:

375

if width <= 0:

376

width = 78

376

width = 78

377

indent = ' ' * block['indent']

377

indent = ' ' * block['indent']

378

if block['type'] == 'admonition':

378

if block['type'] == 'admonition':

379

admonition = _admonitiontitles[block['admonitiontitle']]

379

admonition = _admonitiontitles[block['admonitiontitle']]

380

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

380

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

381

382

defindent = indent + hang * ' '

382

defindent = indent + hang * ' '

383

text = ' '.join(map(str.strip, block['lines']))

383

text = ' '.join(map(str.strip, block['lines']))

384

return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,

384

return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,

385

initindent=defindent,

385

initindent=defindent,

386

hangindent=defindent))

386

hangindent=defindent))

387

if block['type'] == 'margin':

387

if block['type'] == 'margin':

388

return ''

388

return ''

389

if block['type'] == 'literal':

389

if block['type'] == 'literal':

390

indent += ' '

390

indent += ' '

391

return indent + ('\n' + indent).join(block['lines'])

391

return indent + ('\n' + indent).join(block['lines'])

392

if block['type'] == 'section':

392

if block['type'] == 'section':

393

underline = encoding.colwidth(block['lines'][0]) * block['underline']

393

underline = encoding.colwidth(block['lines'][0]) * block['underline']

394

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

394

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

395

if block['type'] == 'definition':

395

if block['type'] == 'definition':

396

term = indent + block['lines'][0]

396

term = indent + block['lines'][0]

397

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

397

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

398

defindent = indent + hang * ' '

398

defindent = indent + hang * ' '

399

text = ' '.join(map(str.strip, block['lines'][1:]))

399

text = ' '.join(map(str.strip, block['lines'][1:]))

400

return '%s\n%s' % (term, util.wrap(text, width=width,

400

return '%s\n%s' % (term, util.wrap(text, width=width,

401

initindent=defindent,

401

initindent=defindent,

402

hangindent=defindent))

402

hangindent=defindent))

403

subindent = indent

403

subindent = indent

404

if block['type'] == 'bullet':

404

if block['type'] == 'bullet':

405

if block['lines'][0].startswith('| '):

405

if block['lines'][0].startswith('| '):

406

# Remove bullet for line blocks and add no extra

406

# Remove bullet for line blocks and add no extra

407

# indention.

407

# indention.

408

block['lines'][0] = block['lines'][0][2:]

408

block['lines'][0] = block['lines'][0][2:]

409

else:

409

else:

410

m = _bulletre.match(block['lines'][0])

410

m = _bulletre.match(block['lines'][0])

411

subindent = indent + m.end() * ' '

411

subindent = indent + m.end() * ' '

412

elif block['type'] == 'field':

412

elif block['type'] == 'field':

413

keywidth = block['keywidth']

413

keywidth = block['keywidth']

414

key = block['key']

414

key = block['key']

415

416

subindent = indent + _fieldwidth * ' '

416

subindent = indent + _fieldwidth * ' '

417

if len(key) + 2 > _fieldwidth:

417

if len(key) + 2 > _fieldwidth:

418

# key too large, use full line width

418

# key too large, use full line width

419

key = key.ljust(width)

419

key = key.ljust(width)

420

elif keywidth + 2 < _fieldwidth:

420

elif keywidth + 2 < _fieldwidth:

421

# all keys are small, add only two spaces

421

# all keys are small, add only two spaces

422

key = key.ljust(keywidth + 2)

422

key = key.ljust(keywidth + 2)

423

subindent = indent + (keywidth + 2) * ' '

423

subindent = indent + (keywidth + 2) * ' '

424

else:

424

else:

425

# mixed sizes, use fieldwidth for this one

425

# mixed sizes, use fieldwidth for this one

426

key = key.ljust(_fieldwidth)

426

key = key.ljust(_fieldwidth)

427

block['lines'][0] = key + block['lines'][0]

427

block['lines'][0] = key + block['lines'][0]

428

elif block['type'] == 'option':

428

elif block['type'] == 'option':

429

return formatoption(block, width)

429

return formatoption(block, width)

430

431

text = ' '.join(map(str.strip, block['lines']))

431

text = ' '.join(map(str.strip, block['lines']))

432

return util.wrap(text, width=width,

432

return util.wrap(text, width=width,

433

initindent=indent,

433

initindent=indent,

434

hangindent=subindent)

434

hangindent=subindent)

435

436

def parse(text, indent=0, keep=None):

436

def parse(text, indent=0, keep=None):

437

"""Parse text into a list of blocks"""

437

"""Parse text into a list of blocks"""

438

pruned = []

438

pruned = []

439

blocks = findblocks(text)

439

blocks = findblocks(text)

440

for b in blocks:

440

for b in blocks:

441

b['indent'] += indent

441

b['indent'] += indent

442

blocks = findliteralblocks(blocks)

442

blocks = findliteralblocks(blocks)

443

blocks, pruned = prunecontainers(blocks, keep or [])

443

blocks, pruned = prunecontainers(blocks, keep or [])

444

blocks = findsections(blocks)

444

blocks = findsections(blocks)

445

blocks = inlineliterals(blocks)

445

blocks = inlineliterals(blocks)

446

blocks = hgrole(blocks)

446

blocks = hgrole(blocks)

447

blocks = splitparagraphs(blocks)

447

blocks = splitparagraphs(blocks)

448

blocks = updatefieldlists(blocks)

448

blocks = updatefieldlists(blocks)

449

blocks = updateoptionlists(blocks)

449

blocks = updateoptionlists(blocks)

450

blocks = addmargins(blocks)

450

blocks = addmargins(blocks)

451

blocks = prunecomments(blocks)

451

blocks = prunecomments(blocks)

452

blocks = findadmonitions(blocks)

452

blocks = findadmonitions(blocks)

453

return blocks, pruned

453

return blocks, pruned

454

455

def formatblocks(blocks, width):

455

def formatblocks(blocks, width):

456

text = '\n'.join(formatblock(b, width) for b in blocks)

456

text = '\n'.join(formatblock(b, width) for b in blocks)

457

return text

457

return text

458

459

def format(text, width, indent=0, keep=None):

459

def format(text, width, indent=0, keep=None):

460

"""Parse and format the text according to width."""

460

"""Parse and format the text according to width."""

461

blocks, pruned = parse(text, indent, keep or [])

461

blocks, pruned = parse(text, indent, keep or [])

462

text = '\n'.join(formatblock(b, width) for b in blocks)

462

text = '\n'.join(formatblock(b, width) for b in blocks)

463

if keep is None:

463

if keep is None:

464

return text

464

return text

465

else:

465

else:

466

return text, pruned

466

return text, pruned

467

468

def getsections(blocks):

469

'''return a list of (section name, nesting level, blocks) tuples'''

470

nest = ""

471

level = 0

472

secs = []

473

for b in blocks:

474

if b['type'] == 'section':

475

i = b['underline']

476

if i not in nest:

477

nest += i

478

level = nest.index(i) + 1

479

nest = nest[:level]

480

secs.append((b['lines'][0], level, [b]))

481

else:

482

if not secs:

483

# add an initial empty section

484

secs = [('', 0, [])]

485

secs[-1][2].append(b)

486

return secs

468

487

469

if __name__ == "__main__":

488

if __name__ == "__main__":

470

from pprint import pprint

489

from pprint import pprint

471

490

472

def debug(func, *args):

491

def debug(func, *args):

473

blocks = func(*args)

492

blocks = func(*args)

474

print "*** after %s:" % func.__name__

493

print "*** after %s:" % func.__name__

475

pprint(blocks)

494

pprint(blocks)

476

print

495

print

477

return blocks

496

return blocks

478

497

479

text = sys.stdin.read()

498

text = sys.stdin.read()

480

blocks = debug(findblocks, text)

499

blocks = debug(findblocks, text)

481

blocks = debug(findliteralblocks, blocks)

500

blocks = debug(findliteralblocks, blocks)

482

blocks, pruned = debug(prunecontainers, blocks, sys.argv[1:])

501

blocks, pruned = debug(prunecontainers, blocks, sys.argv[1:])

483

blocks = debug(inlineliterals, blocks)

502

blocks = debug(inlineliterals, blocks)

484

blocks = debug(splitparagraphs, blocks)

503

blocks = debug(splitparagraphs, blocks)

485

blocks = debug(updatefieldlists, blocks)

504

blocks = debug(updatefieldlists, blocks)

486

blocks = debug(updateoptionlists, blocks)

505

blocks = debug(updateoptionlists, blocks)

487

blocks = debug(findsections, blocks)

506

blocks = debug(findsections, blocks)

488

blocks = debug(addmargins, blocks)

507

blocks = debug(addmargins, blocks)

489

blocks = debug(prunecomments, blocks)

508

blocks = debug(prunecomments, blocks)

490

blocks = debug(findadmonitions, blocks)

509

blocks = debug(findadmonitions, blocks)

491

print '\n'.join(formatblock(b, 30) for b in blocks)

510

print '\n'.join(formatblock(b, 30) for b in blocks)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # minirst.py - minimal reStructuredText parser
             #
             # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """simplified reStructuredText parser.
             This parser knows just enough about reStructuredText to parse the
             Mercurial docstrings.
             It cheats in a major way: nested blocks are not really nested. They
             are just indented blocks that look like they are nested. This relies
             on the user to keep the right indentation for the blocks.
             Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
             when adding support for new constructs.
             """
             import re, sys
             import util, encoding
             from i18n import _
             def replace(text, substs):
                 utext = text.decode(encoding.encoding)
                 for f, t in substs:
                     utext = utext.replace(f, t)
                 return utext.encode(encoding.encoding)
             _blockre = re.compile(r"\n(?:\s*\n)+")
             def findblocks(text):
                 """Find continuous blocks of lines in text.
                 Returns a list of dictionaries representing the blocks. Each block
                 has an 'indent' field and a 'lines' field.
                 """
                 blocks = []
                 for b in _blockre.split(text.strip()):
                     lines = b.splitlines()
                     indent = min((len(l) - len(l.lstrip())) for l in lines)
                     lines = [l[indent:] for l in lines]
                     blocks.append(dict(indent=indent, lines=lines))
                 return blocks
             def findliteralblocks(blocks):
                 """Finds literal blocks and adds a 'type' field to the blocks.
                 Literal blocks are given the type 'literal', all other blocks are
                 given type the 'paragraph'.
                 """
                 i = 0
                 while i < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | paragraph                    |
                     # | (ends with "::")             |
                     # +------------------------------+
                     #    +---------------------------+
                     #    | indented literal block    |
                     #    +---------------------------+
                     blocks[i]['type'] = 'paragraph'
                     if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         if blocks[i]['lines'] == ['::']:
                             # Expanded form: remove block
                             del blocks[i]
                             i -= 1
                         elif blocks[i]['lines'][-1].endswith(' ::'):
                             # Partially minimized form: remove space and both
                             # colons.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
                         else:
                             # Fully minimized form: remove just one colon.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
                         # List items are formatted with a hanging indent. We must
                         # correct for this here while we still have the original
                         # information on the indentation of the subsequent literal
                         # blocks available.
                         m = _bulletre.match(blocks[i]['lines'][0])
                         if m:
                             indent += m.end()
                             adjustment -= m.end()
                         # Mark the following indented blocks.
                         while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
                             blocks[i + 1]['type'] = 'literal'
                             blocks[i + 1]['indent'] -= adjustment
                             i += 1
                     i += 1
                 return blocks
             _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
             _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
                                    r'((.*)  +)(.*)$')
             _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
             _definitionre = re.compile(r'[^ ]')
             def splitparagraphs(blocks):
                 """Split paragraphs into lists."""
                 # Tuples with (list type, item regexp, single line items?). Order
                 # matters: definition lists has the least specific regexp and must
                 # come last.
                 listtypes = [('bullet', _bulletre, True),
                              ('option', _optionre, True),
                              ('field', _fieldre, True),
                              ('definition', _definitionre, False)]
                 def match(lines, i, itemre, singleline):
                     """Does itemre match an item at line i?
                     A list item can be followed by an idented line or another list
                     item (but only if singleline is True).
                     """
                     line1 = lines[i]
                     line2 = i + 1 < len(lines) and lines[i + 1] or ''
                     if not itemre.match(line1):
                         return False
                     if singleline:
                         return line2 == '' or line2[0] == ' ' or itemre.match(line2)
                     else:
                         return line2.startswith(' ')
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] == 'paragraph':
                         lines = blocks[i]['lines']
                         for type, itemre, singleline in listtypes:
                             if match(lines, 0, itemre, singleline):
                                 items = []
                                 for j, line in enumerate(lines):
                                     if match(lines, j, itemre, singleline):
                                         items.append(dict(type=type, lines=[],
                                                           indent=blocks[i]['indent']))
                                     items[-1]['lines'].append(line)
                                 blocks[i:i + 1] = items
                                 break
                     i += 1
                 return blocks
             _fieldwidth = 12
             def updatefieldlists(blocks):
                 """Find key and maximum key width for field lists."""
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] != 'field':
                         i += 1
                         continue
                     keywidth = 0
                     j = i
                     while j < len(blocks) and blocks[j]['type'] == 'field':
                         m = _fieldre.match(blocks[j]['lines'][0])
                         key, rest = m.groups()
                         blocks[j]['lines'][0] = rest
                         blocks[j]['key'] = key
                         keywidth = max(keywidth, len(key))
                         j += 1
                     for block in blocks[i:j]:
                         block['keywidth'] = keywidth
                     i = j + 1
                 return blocks
             def updateoptionlists(blocks):
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] != 'option':
                         i += 1
                         continue
                     optstrwidth = 0
                     j = i
                     while j < len(blocks) and blocks[j]['type'] == 'option':
                         m = _optionre.match(blocks[j]['lines'][0])
                         shortoption = m.group(2)
                         group3 = m.group(3)
                         longoption = group3[2:].strip()
                         desc = m.group(6).strip()
                         longoptionarg = m.group(5).strip()
                         blocks[j]['lines'][0] = desc
                         noshortop = ''
                         if not shortoption:
                             noshortop = '   '
                         opt = "%s%s" %   (shortoption and "-%s " % shortoption or '',
                                         ("%s--%s %s") % (noshortop, longoption,
                                                          longoptionarg))
                         opt = opt.rstrip()
                         blocks[j]['optstr'] = opt
                         optstrwidth = max(optstrwidth, encoding.colwidth(opt))
                         j += 1
                     for block in blocks[i:j]:
                         block['optstrwidth'] = optstrwidth
                     i = j + 1
                 return blocks
             def prunecontainers(blocks, keep):
                 """Prune unwanted containers.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 pruned = []
                 i = 0
                 while i + 1 < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +-------+---------------------------+
                     # | ".. container ::" type            |
                     # +---+                               |
                     #     | blocks                        |
                     #     +-------------------------------+
                     if (blocks[i]['type'] == 'paragraph' and
                         blocks[i]['lines'][0].startswith('.. container::')):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         containertype = blocks[i]['lines'][0][15:]
                         prune = containertype not in keep
                         if prune:
                             pruned.append(containertype)
                         # Always delete "..container:: type" block
                         del blocks[i]
                         j = i
                         while j < len(blocks) and blocks[j]['indent'] > indent:
                             if prune:
                                 del blocks[j]
                                 i -= 1 # adjust outer index
                             else:
                                 blocks[j]['indent'] -= adjustment
                                 j += 1
                     i += 1
                 return blocks, pruned
             _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
             def findsections(blocks):
                 """Finds sections.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 for block in blocks:
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | Section title                |
                     # | -------------                |
                     # +------------------------------+
                     if (block['type'] == 'paragraph' and
                         len(block['lines']) == 2 and
                         encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
                         _sectionre.match(block['lines'][1])):
                         block['underline'] = block['lines'][1][0]
                         block['type'] = 'section'
                         del block['lines'][1]
                 return blocks
             def inlineliterals(blocks):
                 substs = [('``', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         b['lines'] = [replace(l, substs) for l in b['lines']]
                 return blocks
             def hgrole(blocks):
                 substs = [(':hg:`', '"hg '), ('`', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         # Turn :hg:`command` into "hg command". This also works
                         # when there is a line break in the command and relies on
                         # the fact that we have no stray back-quotes in the input
                         # (run the blocks through inlineliterals first).
                         b['lines'] = [replace(l, substs) for l in b['lines']]
                 return blocks
             def addmargins(blocks):
                 """Adds empty blocks for vertical spacing.
                 This groups bullets, options, and definitions together with no vertical
                 space between them, and adds an empty block between all other blocks.
                 """
                 i = 1
                 while i < len(blocks):
                     if (blocks[i]['type'] == blocks[i - 1]['type'] and
                         blocks[i]['type'] in ('bullet', 'option', 'field')):
                         i += 1
                     else:
                         blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
                         i += 2
                 return blocks
             def prunecomments(blocks):
                 """Remove comments."""
                 i = 0
                 while i < len(blocks):
                     b = blocks[i]
                     if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
                                                      b['lines'] == ['..']):
                         del blocks[i]
                         if i < len(blocks) and blocks[i]['type'] == 'margin':
                             del blocks[i]
                     else:
                         i += 1
                 return blocks
             _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
                                        r"error|hint|important|note|tip|warning)::",
                                        flags=re.IGNORECASE)
             def findadmonitions(blocks):
                 """
                 Makes the type of the block an admonition block if
                 the first line is an admonition directive
                 """
                 i = 0
                 while i < len(blocks):
                     m = _admonitionre.match(blocks[i]['lines'][0])
                     if m:
                         blocks[i]['type'] = 'admonition'
                         admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
                         firstline = blocks[i]['lines'][0][m.end() + 1:]
                         if firstline:
                             blocks[i]['lines'].insert(1, '   ' + firstline)
                         blocks[i]['admonitiontitle'] = admonitiontitle
                         del blocks[i]['lines'][0]
                     i = i + 1
                 return blocks
             _admonitiontitles = {'attention': _('Attention:'),
                                  'caution': _('Caution:'),
                                  'danger': _('!Danger!')  ,
                                  'error': _('Error:'),
                                  'hint': _('Hint:'),
                                  'important': _('Important:'),
                                  'note': _('Note:'),
                                  'tip': _('Tip:'),
                                  'warning': _('Warning!')}
             def formatoption(block, width):
                 desc = ' '.join(map(str.strip, block['lines']))
                 colwidth = encoding.colwidth(block['optstr'])
                 usablewidth = width - 1
                 hanging = block['optstrwidth']
                 initindent = '%s%s  ' % (block['optstr'], ' ' * ((hanging - colwidth)))
                 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
                 return ' %s' % (util.wrap(desc, usablewidth,
                                                        initindent=initindent,
                                                        hangindent=hangindent))
             def formatblock(block, width):
                 """Format a block according to width."""
                 if width <= 0:
                     width = 78
                 indent = ' ' * block['indent']
                 if block['type'] == 'admonition':
                     admonition = _admonitiontitles[block['admonitiontitle']]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines']))
                     return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
                                                        initindent=defindent,
                                                        hangindent=defindent))
                 if block['type'] == 'margin':
                     return ''
                 if block['type'] == 'literal':
                     indent += '  '
                     return indent + ('\n' + indent).join(block['lines'])
                 if block['type'] == 'section':
                     underline = encoding.colwidth(block['lines'][0]) * block['underline']
                     return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
                 if block['type'] == 'definition':
                     term = indent + block['lines'][0]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines'][1:]))
                     return '%s\n%s' % (term, util.wrap(text, width=width,
                                                        initindent=defindent,
                                                        hangindent=defindent))
                 subindent = indent
                 if block['type'] == 'bullet':
                     if block['lines'][0].startswith('| '):
                         # Remove bullet for line blocks and add no extra
                         # indention.
                         block['lines'][0] = block['lines'][0][2:]
                     else:
                         m = _bulletre.match(block['lines'][0])
                         subindent = indent + m.end() * ' '
                 elif block['type'] == 'field':
                     keywidth = block['keywidth']
                     key = block['key']
                     subindent = indent + _fieldwidth * ' '
                     if len(key) + 2 > _fieldwidth:
                         # key too large, use full line width
                         key = key.ljust(width)
                     elif keywidth + 2 < _fieldwidth:
                         # all keys are small, add only two spaces
                         key = key.ljust(keywidth + 2)
                         subindent = indent + (keywidth + 2) * ' '
                     else:
                         # mixed sizes, use fieldwidth for this one
                         key = key.ljust(_fieldwidth)
                     block['lines'][0] = key + block['lines'][0]
                 elif block['type'] == 'option':
                     return formatoption(block, width)
                 text = ' '.join(map(str.strip, block['lines']))
                 return util.wrap(text, width=width,
                                  initindent=indent,
                                  hangindent=subindent)
             def parse(text, indent=0, keep=None):
                 """Parse text into a list of blocks"""
                 pruned = []
                 blocks = findblocks(text)
                 for b in blocks:
                     b['indent'] += indent
                 blocks = findliteralblocks(blocks)
                 blocks, pruned = prunecontainers(blocks, keep or [])
                 blocks = findsections(blocks)
                 blocks = inlineliterals(blocks)
                 blocks = hgrole(blocks)
                 blocks = splitparagraphs(blocks)
                 blocks = updatefieldlists(blocks)
                 blocks = updateoptionlists(blocks)
                 blocks = addmargins(blocks)
                 blocks = prunecomments(blocks)
                 blocks = findadmonitions(blocks)
                 return blocks, pruned
             def formatblocks(blocks, width):
                 text = '\n'.join(formatblock(b, width) for b in blocks)
                 return text
             def format(text, width, indent=0, keep=None):
                 """Parse and format the text according to width."""
                 blocks, pruned = parse(text, indent, keep or [])
                 text = '\n'.join(formatblock(b, width) for b in blocks)
                 if keep is None:
                     return text
                 else:
                     return text, pruned
+            def getsections(blocks):
+                '''return a list of (section name, nesting level, blocks) tuples'''
+                nest = ""
+                level = 0
+                secs = []
+                for b in blocks:
+                    if b['type'] == 'section':
+                        i = b['underline']
+                        if i not in nest:
+                            nest += i
+                        level = nest.index(i) + 1
+                        nest = nest[:level]
+                        secs.append((b['lines'][0], level, [b]))
+                    else:
+                        if not secs:
+                            # add an initial empty section
+                            secs = [('', 0, [])]
+                        secs[-1][2].append(b)
+                return secs
             if __name__ == "__main__":
                 from pprint import pprint
                 def debug(func, *args):
                     blocks = func(*args)
                     print "*** after %s:" % func.__name__
                     pprint(blocks)
                     print
                     return blocks
                 text = sys.stdin.read()
                 blocks = debug(findblocks, text)
                 blocks = debug(findliteralblocks, blocks)
                 blocks, pruned = debug(prunecontainers, blocks, sys.argv[1:])
                 blocks = debug(inlineliterals, blocks)
                 blocks = debug(splitparagraphs, blocks)
                 blocks = debug(updatefieldlists, blocks)
                 blocks = debug(updateoptionlists, blocks)
                 blocks = debug(findsections, blocks)
                 blocks = debug(addmargins, blocks)
                 blocks = debug(prunecomments, blocks)
                 blocks = debug(findadmonitions, blocks)
                 print '\n'.join(formatblock(b, 30) for b in blocks)