upstream/mercurial-mirror Commit - r12651:17f28de1

1

# minirst.py - minimal reStructuredText parser

1

# minirst.py - minimal reStructuredText parser

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""simplified reStructuredText parser.

8

"""simplified reStructuredText parser.

9

10

This parser knows just enough about reStructuredText to parse the

10

This parser knows just enough about reStructuredText to parse the

11

Mercurial docstrings.

11

Mercurial docstrings.

12

13

It cheats in a major way: nested blocks are not really nested. They

13

It cheats in a major way: nested blocks are not really nested. They

14

are just indented blocks that look like they are nested. This relies

14

are just indented blocks that look like they are nested. This relies

15

on the user to keep the right indentation for the blocks.

15

on the user to keep the right indentation for the blocks.

16

17

It only supports a small subset of reStructuredText:

17

It only supports a small subset of reStructuredText:

18

19

- sections

19

- sections

20

21

- paragraphs

21

- paragraphs

22

23

- literal blocks

23

- literal blocks

24

25

- definition lists

25

- definition lists

26

27

- specific admonitions

27

- specific admonitions

28

29

- bullet lists (items must start with '-')

29

- bullet lists (items must start with '-')

30

31

- enumerated lists (no autonumbering)

31

- enumerated lists (no autonumbering)

32

33

- field lists (colons cannot be escaped)

33

- field lists (colons cannot be escaped)

34

35

- option lists (supports only long options without arguments)

35

- option lists (supports only long options without arguments)

36

37

- inline literals (no other inline markup is not recognized)

37

- inline literals (no other inline markup is not recognized)

38

"""

38

"""

39

40

import re, sys

40

import re, sys

41

import util, encoding

41

import util, encoding

42

from i18n import _

42

from i18n import _

43

44

45

def replace(text, substs):

45

def replace(text, substs):

46

utext = text.decode(encoding.encoding)

46

utext = text.decode(encoding.encoding)

47

for f, t in substs:

47

for f, t in substs:

48

utext = utext.replace(f, t)

48

utext = utext.replace(f, t)

49

return utext.encode(encoding.encoding)

49

return utext.encode(encoding.encoding)

50

51

52

_blockre = re.compile(r"\n(?:\s*\n)+")

53

51

def findblocks(text):

54

def findblocks(text):

52

"""Find continuous blocks of lines in text.

55

"""Find continuous blocks of lines in text.

53

56

54

Returns a list of dictionaries representing the blocks. Each block

57

Returns a list of dictionaries representing the blocks. Each block

55

has an 'indent' field and a 'lines' field.

58

has an 'indent' field and a 'lines' field.

56

"""

59

"""

57

blocks = [[]]

60

blocks = []

58

lines = text.splitlines()

61

for b in _blockre.split(text.strip()):

59

for line in lines:

62

lines = b.splitlines()

60

if line.strip():

63

indent = min((len(l) - len(l.lstrip())) for l in lines)

61

blocks[-1].append(line)

64

lines = [l[indent:] for l in lines]

62

elif blocks[-1]:

65

blocks.append(dict(indent=indent, lines=lines))

63

blocks.append([])

64

if not blocks[-1]:

65

del blocks[-1]

66

67

for i, block in enumerate(blocks):

68

indent = min((len(l) - len(l.lstrip())) for l in block)

69

blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

70

return blocks

66

return blocks

71

67

72

68

73

def findliteralblocks(blocks):

69

def findliteralblocks(blocks):

74

"""Finds literal blocks and adds a 'type' field to the blocks.

70

"""Finds literal blocks and adds a 'type' field to the blocks.

75

71

76

Literal blocks are given the type 'literal', all other blocks are

72

Literal blocks are given the type 'literal', all other blocks are

77

given type the 'paragraph'.

73

given type the 'paragraph'.

78

"""

74

"""

79

i = 0

75

i = 0

80

while i < len(blocks):

76

while i < len(blocks):

81

# Searching for a block that looks like this:

77

# Searching for a block that looks like this:

82

#

78

#

83

# +------------------------------+

79

# +------------------------------+

84

# | paragraph |

80

# | paragraph |

85

# | (ends with "::") |

81

# | (ends with "::") |

86

# +------------------------------+

82

# +------------------------------+

87

# +---------------------------+

83

# +---------------------------+

88

# | indented literal block |

84

# | indented literal block |

89

# +---------------------------+

85

# +---------------------------+

90

blocks[i]['type'] = 'paragraph'

86

blocks[i]['type'] = 'paragraph'

91

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

87

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

92

indent = blocks[i]['indent']

88

indent = blocks[i]['indent']

93

adjustment = blocks[i + 1]['indent'] - indent

89

adjustment = blocks[i + 1]['indent'] - indent

94

90

95

if blocks[i]['lines'] == ['::']:

91

if blocks[i]['lines'] == ['::']:

96

# Expanded form: remove block

92

# Expanded form: remove block

97

del blocks[i]

93

del blocks[i]

98

i -= 1

94

i -= 1

99

elif blocks[i]['lines'][-1].endswith(' ::'):

95

elif blocks[i]['lines'][-1].endswith(' ::'):

100

# Partially minimized form: remove space and both

96

# Partially minimized form: remove space and both

101

# colons.

97

# colons.

102

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

98

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

103

else:

99

else:

104

# Fully minimized form: remove just one colon.

100

# Fully minimized form: remove just one colon.

105

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

101

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

106

102

107

# List items are formatted with a hanging indent. We must

103

# List items are formatted with a hanging indent. We must

108

# correct for this here while we still have the original

104

# correct for this here while we still have the original

109

# information on the indentation of the subsequent literal

105

# information on the indentation of the subsequent literal

110

# blocks available.

106

# blocks available.

111

m = _bulletre.match(blocks[i]['lines'][0])

107

m = _bulletre.match(blocks[i]['lines'][0])

112

if m:

108

if m:

113

indent += m.end()

109

indent += m.end()

114

adjustment -= m.end()

110

adjustment -= m.end()

115

111

116

# Mark the following indented blocks.

112

# Mark the following indented blocks.

117

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

113

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

118

blocks[i + 1]['type'] = 'literal'

114

blocks[i + 1]['type'] = 'literal'

119

blocks[i + 1]['indent'] -= adjustment

115

blocks[i + 1]['indent'] -= adjustment

120

i += 1

116

i += 1

121

i += 1

117

i += 1

122

return blocks

118

return blocks

123

119

124

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

120

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

125

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

121

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

126

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

122

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

127

_definitionre = re.compile(r'[^ ]')

123

_definitionre = re.compile(r'[^ ]')

128

124

129

def splitparagraphs(blocks):

125

def splitparagraphs(blocks):

130

"""Split paragraphs into lists."""

126

"""Split paragraphs into lists."""

131

# Tuples with (list type, item regexp, single line items?). Order

127

# Tuples with (list type, item regexp, single line items?). Order

132

# matters: definition lists has the least specific regexp and must

128

# matters: definition lists has the least specific regexp and must

133

# come last.

129

# come last.

134

listtypes = [('bullet', _bulletre, True),

130

listtypes = [('bullet', _bulletre, True),

135

('option', _optionre, True),

131

('option', _optionre, True),

136

('field', _fieldre, True),

132

('field', _fieldre, True),

137

('definition', _definitionre, False)]

133

('definition', _definitionre, False)]

138

134

139

def match(lines, i, itemre, singleline):

135

def match(lines, i, itemre, singleline):

140

"""Does itemre match an item at line i?

136

"""Does itemre match an item at line i?

141

137

142

A list item can be followed by an idented line or another list

138

A list item can be followed by an idented line or another list

143

item (but only if singleline is True).

139

item (but only if singleline is True).

144

"""

140

"""

145

line1 = lines[i]

141

line1 = lines[i]

146

line2 = i + 1 < len(lines) and lines[i + 1] or ''

142

line2 = i + 1 < len(lines) and lines[i + 1] or ''

147

if not itemre.match(line1):

143

if not itemre.match(line1):

148

return False

144

return False

149

if singleline:

145

if singleline:

150

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

146

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

151

else:

147

else:

152

return line2.startswith(' ')

148

return line2.startswith(' ')

153

149

154

i = 0

150

i = 0

155

while i < len(blocks):

151

while i < len(blocks):

156

if blocks[i]['type'] == 'paragraph':

152

if blocks[i]['type'] == 'paragraph':

157

lines = blocks[i]['lines']

153

lines = blocks[i]['lines']

158

for type, itemre, singleline in listtypes:

154

for type, itemre, singleline in listtypes:

159

if match(lines, 0, itemre, singleline):

155

if match(lines, 0, itemre, singleline):

160

items = []

156

items = []

161

for j, line in enumerate(lines):

157

for j, line in enumerate(lines):

162

if match(lines, j, itemre, singleline):

158

if match(lines, j, itemre, singleline):

163

items.append(dict(type=type, lines=[],

159

items.append(dict(type=type, lines=[],

164

indent=blocks[i]['indent']))

160

indent=blocks[i]['indent']))

165

items[-1]['lines'].append(line)

161

items[-1]['lines'].append(line)

166

blocks[i:i + 1] = items

162

blocks[i:i + 1] = items

167

break

163

break

168

i += 1

164

i += 1

169

return blocks

165

return blocks

170

166

171

167

172

_fieldwidth = 12

168

_fieldwidth = 12

173

169

174

def updatefieldlists(blocks):

170

def updatefieldlists(blocks):

175

"""Find key and maximum key width for field lists."""

171

"""Find key and maximum key width for field lists."""

176

i = 0

172

i = 0

177

while i < len(blocks):

173

while i < len(blocks):

178

if blocks[i]['type'] != 'field':

174

if blocks[i]['type'] != 'field':

179

i += 1

175

i += 1

180

continue

176

continue

181

177

182

keywidth = 0

178

keywidth = 0

183

j = i

179

j = i

184

while j < len(blocks) and blocks[j]['type'] == 'field':

180

while j < len(blocks) and blocks[j]['type'] == 'field':

185

m = _fieldre.match(blocks[j]['lines'][0])

181

m = _fieldre.match(blocks[j]['lines'][0])

186

key, rest = m.groups()

182

key, rest = m.groups()

187

blocks[j]['lines'][0] = rest

183

blocks[j]['lines'][0] = rest

188

blocks[j]['key'] = key

184

blocks[j]['key'] = key

189

keywidth = max(keywidth, len(key))

185

keywidth = max(keywidth, len(key))

190

j += 1

186

j += 1

191

187

192

for block in blocks[i:j]:

188

for block in blocks[i:j]:

193

block['keywidth'] = keywidth

189

block['keywidth'] = keywidth

194

i = j + 1

190

i = j + 1

195

191

196

return blocks

192

return blocks

197

193

198

194

199

def prunecontainers(blocks, keep):

195

def prunecontainers(blocks, keep):

200

"""Prune unwanted containers.

196

"""Prune unwanted containers.

201

197

202

The blocks must have a 'type' field, i.e., they should have been

198

The blocks must have a 'type' field, i.e., they should have been

203

run through findliteralblocks first.

199

run through findliteralblocks first.

204

"""

200

"""

205

pruned = []

201

pruned = []

206

i = 0

202

i = 0

207

while i + 1 < len(blocks):

203

while i + 1 < len(blocks):

208

# Searching for a block that looks like this:

204

# Searching for a block that looks like this:

209

#

205

#

210

# +-------+---------------------------+

206

# +-------+---------------------------+

211

# | ".. container ::" type |

207

# | ".. container ::" type |

212

# +---+ |

208

# +---+ |

213

# | blocks |

209

# | blocks |

214

# +-------------------------------+

210

# +-------------------------------+

215

if (blocks[i]['type'] == 'paragraph' and

211

if (blocks[i]['type'] == 'paragraph' and

216

blocks[i]['lines'][0].startswith('.. container::')):

212

blocks[i]['lines'][0].startswith('.. container::')):

217

indent = blocks[i]['indent']

213

indent = blocks[i]['indent']

218

adjustment = blocks[i + 1]['indent'] - indent

214

adjustment = blocks[i + 1]['indent'] - indent

219

containertype = blocks[i]['lines'][0][15:]

215

containertype = blocks[i]['lines'][0][15:]

220

prune = containertype not in keep

216

prune = containertype not in keep

221

if prune:

217

if prune:

222

pruned.append(containertype)

218

pruned.append(containertype)

223

219

224

# Always delete "..container:: type" block

220

# Always delete "..container:: type" block

225

del blocks[i]

221

del blocks[i]

226

j = i

222

j = i

227

while j < len(blocks) and blocks[j]['indent'] > indent:

223

while j < len(blocks) and blocks[j]['indent'] > indent:

228

if prune:

224

if prune:

229

del blocks[j]

225

del blocks[j]

230

i -= 1 # adjust outer index

226

i -= 1 # adjust outer index

231

else:

227

else:

232

blocks[j]['indent'] -= adjustment

228

blocks[j]['indent'] -= adjustment

233

j += 1

229

j += 1

234

i += 1

230

i += 1

235

return blocks, pruned

231

return blocks, pruned

236

232

237

233

238

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

234

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

239

235

240

def findsections(blocks):

236

def findsections(blocks):

241

"""Finds sections.

237

"""Finds sections.

242

238

243

The blocks must have a 'type' field, i.e., they should have been

239

The blocks must have a 'type' field, i.e., they should have been

244

run through findliteralblocks first.

240

run through findliteralblocks first.

245

"""

241

"""

246

for block in blocks:

242

for block in blocks:

247

# Searching for a block that looks like this:

243

# Searching for a block that looks like this:

248

#

244

#

249

# +------------------------------+

245

# +------------------------------+

250

# | Section title |

246

# | Section title |

251

# | ------------- |

247

# | ------------- |

252

# +------------------------------+

248

# +------------------------------+

253

if (block['type'] == 'paragraph' and

249

if (block['type'] == 'paragraph' and

254

len(block['lines']) == 2 and

250

len(block['lines']) == 2 and

255

len(block['lines'][0]) == len(block['lines'][1]) and

251

len(block['lines'][0]) == len(block['lines'][1]) and

256

_sectionre.match(block['lines'][1])):

252

_sectionre.match(block['lines'][1])):

257

block['underline'] = block['lines'][1][0]

253

block['underline'] = block['lines'][1][0]

258

block['type'] = 'section'

254

block['type'] = 'section'

259

del block['lines'][1]

255

del block['lines'][1]

260

return blocks

256

return blocks

261

257

262

258

263

def inlineliterals(blocks):

259

def inlineliterals(blocks):

264

substs = [('``', '"')]

260

substs = [('``', '"')]

265

for b in blocks:

261

for b in blocks:

266

if b['type'] in ('paragraph', 'section'):

262

if b['type'] in ('paragraph', 'section'):

267

b['lines'] = [replace(l, substs) for l in b['lines']]

263

b['lines'] = [replace(l, substs) for l in b['lines']]

268

return blocks

264

return blocks

269

265

270

266

271

def hgrole(blocks):

267

def hgrole(blocks):

272

substs = [(':hg:`', '"hg '), ('`', '"')]

268

substs = [(':hg:`', '"hg '), ('`', '"')]

273

for b in blocks:

269

for b in blocks:

274

if b['type'] in ('paragraph', 'section'):

270

if b['type'] in ('paragraph', 'section'):

275

# Turn :hg:`command` into "hg command". This also works

271

# Turn :hg:`command` into "hg command". This also works

276

# when there is a line break in the command and relies on

272

# when there is a line break in the command and relies on

277

# the fact that we have no stray back-quotes in the input

273

# the fact that we have no stray back-quotes in the input

278

# (run the blocks through inlineliterals first).

274

# (run the blocks through inlineliterals first).

279

b['lines'] = [replace(l, substs) for l in b['lines']]

275

b['lines'] = [replace(l, substs) for l in b['lines']]

280

return blocks

276

return blocks

281

277

282

278

283

def addmargins(blocks):

279

def addmargins(blocks):

284

"""Adds empty blocks for vertical spacing.

280

"""Adds empty blocks for vertical spacing.

285

281

286

This groups bullets, options, and definitions together with no vertical

282

This groups bullets, options, and definitions together with no vertical

287

space between them, and adds an empty block between all other blocks.

283

space between them, and adds an empty block between all other blocks.

288

"""

284

"""

289

i = 1

285

i = 1

290

while i < len(blocks):

286

while i < len(blocks):

291

if (blocks[i]['type'] == blocks[i - 1]['type'] and

287

if (blocks[i]['type'] == blocks[i - 1]['type'] and

292

blocks[i]['type'] in ('bullet', 'option', 'field')):

288

blocks[i]['type'] in ('bullet', 'option', 'field')):

293

i += 1

289

i += 1

294

else:

290

else:

295

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

291

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

296

i += 2

292

i += 2

297

return blocks

293

return blocks

298

294

299

_admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"

295

_admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"

300

296

301

flags=re.IGNORECASE)

297

flags=re.IGNORECASE)

302

298

303

def findadmonitions(blocks):

299

def findadmonitions(blocks):

304

"""

300

"""

305

Makes the type of the block an admonition block if

301

Makes the type of the block an admonition block if

306

the first line is an admonition directive

302

the first line is an admonition directive

307

"""

303

"""

308

i = 0

304

i = 0

309

while i < len(blocks):

305

while i < len(blocks):

310

m = _admonitionre.match(blocks[i]['lines'][0])

306

m = _admonitionre.match(blocks[i]['lines'][0])

311

if m:

307

if m:

312

blocks[i]['type'] = 'admonition'

308

blocks[i]['type'] = 'admonition'

313

admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()

309

admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()

314

310

315

firstline = blocks[i]['lines'][0][m.end() + 1:]

311

firstline = blocks[i]['lines'][0][m.end() + 1:]

316

if firstline:

312

if firstline:

317

blocks[i]['lines'].insert(1, ' ' + firstline)

313

blocks[i]['lines'].insert(1, ' ' + firstline)

318

314

319

blocks[i]['admonitiontitle'] = admonitiontitle

315

blocks[i]['admonitiontitle'] = admonitiontitle

320

del blocks[i]['lines'][0]

316

del blocks[i]['lines'][0]

321

i = i + 1

317

i = i + 1

322

return blocks

318

return blocks

323

319

324

def formatblock(block, width):

320

def formatblock(block, width):

325

"""Format a block according to width."""

321

"""Format a block according to width."""

326

if width <= 0:

322

if width <= 0:

327

width = 78

323

width = 78

328

indent = ' ' * block['indent']

324

indent = ' ' * block['indent']

329

if block['type'] == 'admonition':

325

if block['type'] == 'admonition':

330

titles = {'attention': _('Attention:'),

326

titles = {'attention': _('Attention:'),

331

'caution': _('Caution:'),

327

'caution': _('Caution:'),

332

'danger': _('!Danger!') ,

328

'danger': _('!Danger!') ,

333

'error': _('Error:'),

329

'error': _('Error:'),

334

'hint': _('Hint:'),

330

'hint': _('Hint:'),

335

'important': _('Important:'),

331

'important': _('Important:'),

336

'note': _('Note:'),

332

'note': _('Note:'),

337

'tip': _('Tip:'),

333

'tip': _('Tip:'),

338

'warning': _('Warning!')}

334

'warning': _('Warning!')}

339

335

340

admonition = titles[block['admonitiontitle']]

336

admonition = titles[block['admonitiontitle']]

341

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

337

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

342

338

343

defindent = indent + hang * ' '

339

defindent = indent + hang * ' '

344

text = ' '.join(map(str.strip, block['lines']))

340

text = ' '.join(map(str.strip, block['lines']))

345

return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,

341

return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,

346

initindent=defindent,

342

initindent=defindent,

347

hangindent=defindent))

343

hangindent=defindent))

348

if block['type'] == 'margin':

344

if block['type'] == 'margin':

349

return ''

345

return ''

350

if block['type'] == 'literal':

346

if block['type'] == 'literal':

351

indent += ' '

347

indent += ' '

352

return indent + ('\n' + indent).join(block['lines'])

348

return indent + ('\n' + indent).join(block['lines'])

353

if block['type'] == 'section':

349

if block['type'] == 'section':

354

underline = len(block['lines'][0]) * block['underline']

350

underline = len(block['lines'][0]) * block['underline']

355

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

351

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

356

if block['type'] == 'definition':

352

if block['type'] == 'definition':

357

term = indent + block['lines'][0]

353

term = indent + block['lines'][0]

358

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

354

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

359

defindent = indent + hang * ' '

355

defindent = indent + hang * ' '

360

text = ' '.join(map(str.strip, block['lines'][1:]))

356

text = ' '.join(map(str.strip, block['lines'][1:]))

361

return '%s\n%s' % (term, util.wrap(text, width=width,

357

return '%s\n%s' % (term, util.wrap(text, width=width,

362

initindent=defindent,

358

initindent=defindent,

363

hangindent=defindent))

359

hangindent=defindent))

364

subindent = indent

360

subindent = indent

365

if block['type'] == 'bullet':

361

if block['type'] == 'bullet':

366

if block['lines'][0].startswith('| '):

362

if block['lines'][0].startswith('| '):

367

# Remove bullet for line blocks and add no extra

363

# Remove bullet for line blocks and add no extra

368

# indention.

364

# indention.

369

block['lines'][0] = block['lines'][0][2:]

365

block['lines'][0] = block['lines'][0][2:]

370

else:

366

else:

371

m = _bulletre.match(block['lines'][0])

367

m = _bulletre.match(block['lines'][0])

372

subindent = indent + m.end() * ' '

368

subindent = indent + m.end() * ' '

373

elif block['type'] == 'field':

369

elif block['type'] == 'field':

374

keywidth = block['keywidth']

370

keywidth = block['keywidth']

375

key = block['key']

371

key = block['key']

376

372

377

subindent = indent + _fieldwidth * ' '

373

subindent = indent + _fieldwidth * ' '

378

if len(key) + 2 > _fieldwidth:

374

if len(key) + 2 > _fieldwidth:

379

# key too large, use full line width

375

# key too large, use full line width

380

key = key.ljust(width)

376

key = key.ljust(width)

381

elif keywidth + 2 < _fieldwidth:

377

elif keywidth + 2 < _fieldwidth:

382

# all keys are small, add only two spaces

378

# all keys are small, add only two spaces

383

key = key.ljust(keywidth + 2)

379

key = key.ljust(keywidth + 2)

384

subindent = indent + (keywidth + 2) * ' '

380

subindent = indent + (keywidth + 2) * ' '

385

else:

381

else:

386

# mixed sizes, use fieldwidth for this one

382

# mixed sizes, use fieldwidth for this one

387

key = key.ljust(_fieldwidth)

383

key = key.ljust(_fieldwidth)

388

block['lines'][0] = key + block['lines'][0]

384

block['lines'][0] = key + block['lines'][0]

389

elif block['type'] == 'option':

385

elif block['type'] == 'option':

390

m = _optionre.match(block['lines'][0])

386

m = _optionre.match(block['lines'][0])

391

option, arg, rest = m.groups()

387

option, arg, rest = m.groups()

392

subindent = indent + (len(option) + len(arg)) * ' '

388

subindent = indent + (len(option) + len(arg)) * ' '

393

389

394

text = ' '.join(map(str.strip, block['lines']))

390

text = ' '.join(map(str.strip, block['lines']))

395

return util.wrap(text, width=width,

391

return util.wrap(text, width=width,

396

initindent=indent,

392

initindent=indent,

397

hangindent=subindent)

393

hangindent=subindent)

398

394

399

395

400

def format(text, width, indent=0, keep=None):

396

def format(text, width, indent=0, keep=None):

401

"""Parse and format the text according to width."""

397

"""Parse and format the text according to width."""

402

blocks = findblocks(text)

398

blocks = findblocks(text)

403

for b in blocks:

399

for b in blocks:

404

b['indent'] += indent

400

b['indent'] += indent

405

blocks = findliteralblocks(blocks)

401

blocks = findliteralblocks(blocks)

406

blocks, pruned = prunecontainers(blocks, keep or [])

402

blocks, pruned = prunecontainers(blocks, keep or [])

407

blocks = findsections(blocks)

403

blocks = findsections(blocks)

408

blocks = inlineliterals(blocks)

404

blocks = inlineliterals(blocks)

409

blocks = hgrole(blocks)

405

blocks = hgrole(blocks)

410

blocks = splitparagraphs(blocks)

406

blocks = splitparagraphs(blocks)

411

blocks = updatefieldlists(blocks)

407

blocks = updatefieldlists(blocks)

412

blocks = addmargins(blocks)

408

blocks = addmargins(blocks)

413

blocks = findadmonitions(blocks)

409

blocks = findadmonitions(blocks)

414

text = '\n'.join(formatblock(b, width) for b in blocks)

410

text = '\n'.join(formatblock(b, width) for b in blocks)

415

if keep is None:

411

if keep is None:

416

return text

412

return text

417

else:

413

else:

418

return text, pruned

414

return text, pruned

419

415

420

416

421

if __name__ == "__main__":

417

if __name__ == "__main__":

422

from pprint import pprint

418

from pprint import pprint

423

419

424

def debug(func, *args):

420

def debug(func, *args):

425

blocks = func(*args)

421

blocks = func(*args)

426

print "*** after %s:" % func.__name__

422

print "*** after %s:" % func.__name__

427

pprint(blocks)

423

pprint(blocks)

428

print

424

print

429

return blocks

425

return blocks

430

426

431

text = open(sys.argv[1]).read()

427

text = open(sys.argv[1]).read()

432

blocks = debug(findblocks, text)

428

blocks = debug(findblocks, text)

433

blocks = debug(findliteralblocks, blocks)

429

blocks = debug(findliteralblocks, blocks)

434

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

430

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

435

blocks = debug(inlineliterals, blocks)

431

blocks = debug(inlineliterals, blocks)

436

blocks = debug(splitparagraphs, blocks)

432

blocks = debug(splitparagraphs, blocks)

437

blocks = debug(updatefieldlists, blocks)

433

blocks = debug(updatefieldlists, blocks)

438

blocks = debug(findsections, blocks)

434

blocks = debug(findsections, blocks)

439

blocks = debug(addmargins, blocks)

435

blocks = debug(addmargins, blocks)

440

blocks = debug(findadmonitions, blocks)

436

blocks = debug(findadmonitions, blocks)

441

print '\n'.join(formatblock(b, 30) for b in blocks)

437

print '\n'.join(formatblock(b, 30) for b in blocks)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # minirst.py - minimal reStructuredText parser
             #
             # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """simplified reStructuredText parser.
             This parser knows just enough about reStructuredText to parse the
             Mercurial docstrings.
             It cheats in a major way: nested blocks are not really nested. They
             are just indented blocks that look like they are nested. This relies
             on the user to keep the right indentation for the blocks.
             It only supports a small subset of reStructuredText:
             - sections
             - paragraphs
             - literal blocks
             - definition lists
             - specific admonitions
             - bullet lists (items must start with '-')
             - enumerated lists (no autonumbering)
             - field lists (colons cannot be escaped)
             - option lists (supports only long options without arguments)
             - inline literals (no other inline markup is not recognized)
             """
             import re, sys
             import util, encoding
             from i18n import _
             def replace(text, substs):
                 utext = text.decode(encoding.encoding)
                 for f, t in substs:
                     utext = utext.replace(f, t)
                 return utext.encode(encoding.encoding)
+            _blockre = re.compile(r"\n(?:\s*\n)+")
             def findblocks(text):
                 """Find continuous blocks of lines in text.
                 Returns a list of dictionaries representing the blocks. Each block
                 has an 'indent' field and a 'lines' field.
                 """
-                blocks = [[]]
+                blocks = []
-                lines = text.splitlines()
+                for b in _blockre.split(text.strip()):
-                for line in lines:
+                    lines = b.splitlines()
-                    if line.strip():
+                    indent = min((len(l) - len(l.lstrip())) for l in lines)
-                        blocks[-1].append(line)
+                    lines = [l[indent:] for l in lines]
-                    elif blocks[-1]:
+                    blocks.append(dict(indent=indent, lines=lines))
-                        blocks.append([])
-                if not blocks[-1]:
-                    del blocks[-1]
-                for i, block in enumerate(blocks):
-                    indent = min((len(l) - len(l.lstrip())) for l in block)
-                    blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
                 return blocks
             def findliteralblocks(blocks):
                 """Finds literal blocks and adds a 'type' field to the blocks.
                 Literal blocks are given the type 'literal', all other blocks are
                 given type the 'paragraph'.
                 """
                 i = 0
                 while i < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | paragraph                    |
                     # | (ends with "::")             |
                     # +------------------------------+
                     #    +---------------------------+
                     #    | indented literal block    |
                     #    +---------------------------+
                     blocks[i]['type'] = 'paragraph'
                     if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         if blocks[i]['lines'] == ['::']:
                             # Expanded form: remove block
                             del blocks[i]
                             i -= 1
                         elif blocks[i]['lines'][-1].endswith(' ::'):
                             # Partially minimized form: remove space and both
                             # colons.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
                         else:
                             # Fully minimized form: remove just one colon.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
                         # List items are formatted with a hanging indent. We must
                         # correct for this here while we still have the original
                         # information on the indentation of the subsequent literal
                         # blocks available.
                         m = _bulletre.match(blocks[i]['lines'][0])
                         if m:
                             indent += m.end()
                             adjustment -= m.end()
                         # Mark the following indented blocks.
                         while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
                             blocks[i + 1]['type'] = 'literal'
                             blocks[i + 1]['indent'] -= adjustment
                             i += 1
                     i += 1
                 return blocks
             _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
             _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
             _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
             _definitionre = re.compile(r'[^ ]')
             def splitparagraphs(blocks):
                 """Split paragraphs into lists."""
                 # Tuples with (list type, item regexp, single line items?). Order
                 # matters: definition lists has the least specific regexp and must
                 # come last.
                 listtypes = [('bullet', _bulletre, True),
                              ('option', _optionre, True),
                              ('field', _fieldre, True),
                              ('definition', _definitionre, False)]
                 def match(lines, i, itemre, singleline):
                     """Does itemre match an item at line i?
                     A list item can be followed by an idented line or another list
                     item (but only if singleline is True).
                     """
                     line1 = lines[i]
                     line2 = i + 1 < len(lines) and lines[i + 1] or ''
                     if not itemre.match(line1):
                         return False
                     if singleline:
                         return line2 == '' or line2[0] == ' ' or itemre.match(line2)
                     else:
                         return line2.startswith(' ')
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] == 'paragraph':
                         lines = blocks[i]['lines']
                         for type, itemre, singleline in listtypes:
                             if match(lines, 0, itemre, singleline):
                                 items = []
                                 for j, line in enumerate(lines):
                                     if match(lines, j, itemre, singleline):
                                         items.append(dict(type=type, lines=[],
                                                           indent=blocks[i]['indent']))
                                     items[-1]['lines'].append(line)
                                 blocks[i:i + 1] = items
                                 break
                     i += 1
                 return blocks
             _fieldwidth = 12
             def updatefieldlists(blocks):
                 """Find key and maximum key width for field lists."""
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] != 'field':
                         i += 1
                         continue
                     keywidth = 0
                     j = i
                     while j < len(blocks) and blocks[j]['type'] == 'field':
                         m = _fieldre.match(blocks[j]['lines'][0])
                         key, rest = m.groups()
                         blocks[j]['lines'][0] = rest
                         blocks[j]['key'] = key
                         keywidth = max(keywidth, len(key))
                         j += 1
                     for block in blocks[i:j]:
                         block['keywidth'] = keywidth
                     i = j + 1
                 return blocks
             def prunecontainers(blocks, keep):
                 """Prune unwanted containers.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 pruned = []
                 i = 0
                 while i + 1 < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +-------+---------------------------+
                     # | ".. container ::" type            |
                     # +---+                               |
                     #     | blocks                        |
                     #     +-------------------------------+
                     if (blocks[i]['type'] == 'paragraph' and
                         blocks[i]['lines'][0].startswith('.. container::')):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         containertype = blocks[i]['lines'][0][15:]
                         prune = containertype not in keep
                         if prune:
                             pruned.append(containertype)
                         # Always delete "..container:: type" block
                         del blocks[i]
                         j = i
                         while j < len(blocks) and blocks[j]['indent'] > indent:
                             if prune:
                                 del blocks[j]
                                 i -= 1 # adjust outer index
                             else:
                                 blocks[j]['indent'] -= adjustment
                                 j += 1
                     i += 1
                 return blocks, pruned
             _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
             def findsections(blocks):
                 """Finds sections.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 for block in blocks:
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | Section title                |
                     # | -------------                |
                     # +------------------------------+
                     if (block['type'] == 'paragraph' and
                         len(block['lines']) == 2 and
                         len(block['lines'][0]) == len(block['lines'][1]) and
                         _sectionre.match(block['lines'][1])):
                         block['underline'] = block['lines'][1][0]
                         block['type'] = 'section'
                         del block['lines'][1]
                 return blocks
             def inlineliterals(blocks):
                 substs = [('``', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         b['lines'] = [replace(l, substs) for l in b['lines']]
                 return blocks
             def hgrole(blocks):
                 substs = [(':hg:`', '"hg '), ('`', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         # Turn :hg:`command` into "hg command". This also works
                         # when there is a line break in the command and relies on
                         # the fact that we have no stray back-quotes in the input
                         # (run the blocks through inlineliterals first).
                         b['lines'] = [replace(l, substs) for l in b['lines']]
                 return blocks
             def addmargins(blocks):
                 """Adds empty blocks for vertical spacing.
                 This groups bullets, options, and definitions together with no vertical
                 space between them, and adds an empty block between all other blocks.
                 """
                 i = 1
                 while i < len(blocks):
                     if (blocks[i]['type'] == blocks[i - 1]['type'] and
                         blocks[i]['type'] in ('bullet', 'option', 'field')):
                         i += 1
                     else:
                         blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
                         i += 2
                 return blocks
             _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
                                        r"error|hint|important|note|tip|warning)::",
                                        flags=re.IGNORECASE)
             def findadmonitions(blocks):
                 """
                 Makes the type of the block an admonition block if
                 the first line is an admonition directive
                 """
                 i = 0
                 while i < len(blocks):
                     m = _admonitionre.match(blocks[i]['lines'][0])
                     if m:
                         blocks[i]['type'] = 'admonition'
                         admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
                         firstline = blocks[i]['lines'][0][m.end() + 1:]
                         if firstline:
                             blocks[i]['lines'].insert(1, '   ' + firstline)
                         blocks[i]['admonitiontitle'] = admonitiontitle
                         del blocks[i]['lines'][0]
                     i = i + 1
                 return blocks
             def formatblock(block, width):
                 """Format a block according to width."""
                 if width <= 0:
                     width = 78
                 indent = ' ' * block['indent']
                 if block['type'] == 'admonition':
                     titles = {'attention': _('Attention:'),
                               'caution': _('Caution:'),
                               'danger': _('!Danger!')  ,
                               'error': _('Error:'),
                               'hint': _('Hint:'),
                               'important': _('Important:'),
                               'note': _('Note:'),
                               'tip': _('Tip:'),
                               'warning': _('Warning!')}
                     admonition = titles[block['admonitiontitle']]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines']))
                     return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
                                                        initindent=defindent,
                                                        hangindent=defindent))
                 if block['type'] == 'margin':
                     return ''
                 if block['type'] == 'literal':
                     indent += '  '
                     return indent + ('\n' + indent).join(block['lines'])
                 if block['type'] == 'section':
                     underline = len(block['lines'][0]) * block['underline']
                     return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
                 if block['type'] == 'definition':
                     term = indent + block['lines'][0]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines'][1:]))
                     return '%s\n%s' % (term, util.wrap(text, width=width,
                                                        initindent=defindent,
                                                        hangindent=defindent))
                 subindent = indent
                 if block['type'] == 'bullet':
                     if block['lines'][0].startswith('| '):
                         # Remove bullet for line blocks and add no extra
                         # indention.
                         block['lines'][0] = block['lines'][0][2:]
                     else:
                         m = _bulletre.match(block['lines'][0])
                         subindent = indent + m.end() * ' '
                 elif block['type'] == 'field':
                     keywidth = block['keywidth']
                     key = block['key']
                     subindent = indent + _fieldwidth * ' '
                     if len(key) + 2 > _fieldwidth:
                         # key too large, use full line width
                         key = key.ljust(width)
                     elif keywidth + 2 < _fieldwidth:
                         # all keys are small, add only two spaces
                         key = key.ljust(keywidth + 2)
                         subindent = indent + (keywidth + 2) * ' '
                     else:
                         # mixed sizes, use fieldwidth for this one
                         key = key.ljust(_fieldwidth)
                     block['lines'][0] = key + block['lines'][0]
                 elif block['type'] == 'option':
                     m = _optionre.match(block['lines'][0])
                     option, arg, rest = m.groups()
                     subindent = indent + (len(option) + len(arg)) * ' '
                 text = ' '.join(map(str.strip, block['lines']))
                 return util.wrap(text, width=width,
                                  initindent=indent,
                                  hangindent=subindent)
             def format(text, width, indent=0, keep=None):
                 """Parse and format the text according to width."""
                 blocks = findblocks(text)
                 for b in blocks:
                     b['indent'] += indent
                 blocks = findliteralblocks(blocks)
                 blocks, pruned = prunecontainers(blocks, keep or [])
                 blocks = findsections(blocks)
                 blocks = inlineliterals(blocks)
                 blocks = hgrole(blocks)
                 blocks = splitparagraphs(blocks)
                 blocks = updatefieldlists(blocks)
                 blocks = addmargins(blocks)
                 blocks = findadmonitions(blocks)
                 text = '\n'.join(formatblock(b, width) for b in blocks)
                 if keep is None:
                     return text
                 else:
                     return text, pruned
             if __name__ == "__main__":
                 from pprint import pprint
                 def debug(func, *args):
                     blocks = func(*args)
                     print "*** after %s:" % func.__name__
                     pprint(blocks)
                     print
                     return blocks
                 text = open(sys.argv[1]).read()
                 blocks = debug(findblocks, text)
                 blocks = debug(findliteralblocks, blocks)
                 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
                 blocks = debug(inlineliterals, blocks)
                 blocks = debug(splitparagraphs, blocks)
                 blocks = debug(updatefieldlists, blocks)
                 blocks = debug(findsections, blocks)
                 blocks = debug(addmargins, blocks)
                 blocks = debug(findadmonitions, blocks)
                 print '\n'.join(formatblock(b, 30) for b in blocks)