upstream/mercurial-mirror Commit - r11464:521c8e0c

1

# minirst.py - minimal reStructuredText parser

1

# minirst.py - minimal reStructuredText parser

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""simplified reStructuredText parser.

8

"""simplified reStructuredText parser.

9

10

This parser knows just enough about reStructuredText to parse the

10

This parser knows just enough about reStructuredText to parse the

11

Mercurial docstrings.

11

Mercurial docstrings.

12

13

It cheats in a major way: nested blocks are not really nested. They

13

It cheats in a major way: nested blocks are not really nested. They

14

are just indented blocks that look like they are nested. This relies

14

are just indented blocks that look like they are nested. This relies

15

on the user to keep the right indentation for the blocks.

15

on the user to keep the right indentation for the blocks.

16

17

It only supports a small subset of reStructuredText:

17

It only supports a small subset of reStructuredText:

18

19

- sections

19

- sections

20

21

- paragraphs

21

- paragraphs

22

23

- literal blocks

23

- literal blocks

24

25

- definition lists

25

- definition lists

26

27

- bullet lists (items must start with '-')

27

- bullet lists (items must start with '-')

28

29

- enumerated lists (no autonumbering)

29

- enumerated lists (no autonumbering)

30

31

- field lists (colons cannot be escaped)

31

- field lists (colons cannot be escaped)

32

33

- option lists (supports only long options without arguments)

33

- option lists (supports only long options without arguments)

34

35

- inline literals (no other inline markup is not recognized)

35

- inline literals (no other inline markup is not recognized)

36

"""

36

"""

37

38

import re, sys

38

import re, sys

39

import util

39

import util, encoding

40

41

def replace(text, substs):

42

utext = text.decode(encoding.encoding)

43

for f, t in substs:

44

utext = utext.replace(f, t)

45

return utext.encode(encoding.encoding)

40

46

41

def findblocks(text):

47

def findblocks(text):

42

"""Find continuous blocks of lines in text.

48

"""Find continuous blocks of lines in text.

43

49

44

Returns a list of dictionaries representing the blocks. Each block

50

Returns a list of dictionaries representing the blocks. Each block

45

has an 'indent' field and a 'lines' field.

51

has an 'indent' field and a 'lines' field.

46

"""

52

"""

47

blocks = [[]]

53

blocks = [[]]

48

lines = text.splitlines()

54

lines = text.splitlines()

49

for line in lines:

55

for line in lines:

50

if line.strip():

56

if line.strip():

51

blocks[-1].append(line)

57

blocks[-1].append(line)

52

elif blocks[-1]:

58

elif blocks[-1]:

53

blocks.append([])

59

blocks.append([])

54

if not blocks[-1]:

60

if not blocks[-1]:

55

del blocks[-1]

61

del blocks[-1]

56

62

57

for i, block in enumerate(blocks):

63

for i, block in enumerate(blocks):

58

indent = min((len(l) - len(l.lstrip())) for l in block)

64

indent = min((len(l) - len(l.lstrip())) for l in block)

59

blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

65

blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])

60

return blocks

66

return blocks

61

67

62

68

63

def findliteralblocks(blocks):

69

def findliteralblocks(blocks):

64

"""Finds literal blocks and adds a 'type' field to the blocks.

70

"""Finds literal blocks and adds a 'type' field to the blocks.

65

71

66

Literal blocks are given the type 'literal', all other blocks are

72

Literal blocks are given the type 'literal', all other blocks are

67

given type the 'paragraph'.

73

given type the 'paragraph'.

68

"""

74

"""

69

i = 0

75

i = 0

70

while i < len(blocks):

76

while i < len(blocks):

71

# Searching for a block that looks like this:

77

# Searching for a block that looks like this:

72

#

78

#

73

# +------------------------------+

79

# +------------------------------+

74

# | paragraph |

80

# | paragraph |

75

# | (ends with "::") |

81

# | (ends with "::") |

76

# +------------------------------+

82

# +------------------------------+

77

# +---------------------------+

83

# +---------------------------+

78

# | indented literal block |

84

# | indented literal block |

79

# +---------------------------+

85

# +---------------------------+

80

blocks[i]['type'] = 'paragraph'

86

blocks[i]['type'] = 'paragraph'

81

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

87

if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):

82

indent = blocks[i]['indent']

88

indent = blocks[i]['indent']

83

adjustment = blocks[i + 1]['indent'] - indent

89

adjustment = blocks[i + 1]['indent'] - indent

84

90

85

if blocks[i]['lines'] == ['::']:

91

if blocks[i]['lines'] == ['::']:

86

# Expanded form: remove block

92

# Expanded form: remove block

87

del blocks[i]

93

del blocks[i]

88

i -= 1

94

i -= 1

89

elif blocks[i]['lines'][-1].endswith(' ::'):

95

elif blocks[i]['lines'][-1].endswith(' ::'):

90

# Partially minimized form: remove space and both

96

# Partially minimized form: remove space and both

91

# colons.

97

# colons.

92

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

98

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]

93

else:

99

else:

94

# Fully minimized form: remove just one colon.

100

# Fully minimized form: remove just one colon.

95

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

101

blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]

96

102

97

# List items are formatted with a hanging indent. We must

103

# List items are formatted with a hanging indent. We must

98

# correct for this here while we still have the original

104

# correct for this here while we still have the original

99

# information on the indentation of the subsequent literal

105

# information on the indentation of the subsequent literal

100

# blocks available.

106

# blocks available.

101

m = _bulletre.match(blocks[i]['lines'][0])

107

m = _bulletre.match(blocks[i]['lines'][0])

102

if m:

108

if m:

103

indent += m.end()

109

indent += m.end()

104

adjustment -= m.end()

110

adjustment -= m.end()

105

111

106

# Mark the following indented blocks.

112

# Mark the following indented blocks.

107

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

113

while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:

108

blocks[i + 1]['type'] = 'literal'

114

blocks[i + 1]['type'] = 'literal'

109

blocks[i + 1]['indent'] -= adjustment

115

blocks[i + 1]['indent'] -= adjustment

110

i += 1

116

i += 1

111

i += 1

117

i += 1

112

return blocks

118

return blocks

113

119

114

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

120

_bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|$?[0-9A-Za-z]+$|\|) ')

115

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

121

_optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')

116

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

122

_fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')

117

_definitionre = re.compile(r'[^ ]')

123

_definitionre = re.compile(r'[^ ]')

118

124

119

def splitparagraphs(blocks):

125

def splitparagraphs(blocks):

120

"""Split paragraphs into lists."""

126

"""Split paragraphs into lists."""

121

# Tuples with (list type, item regexp, single line items?). Order

127

# Tuples with (list type, item regexp, single line items?). Order

122

# matters: definition lists has the least specific regexp and must

128

# matters: definition lists has the least specific regexp and must

123

# come last.

129

# come last.

124

listtypes = [('bullet', _bulletre, True),

130

listtypes = [('bullet', _bulletre, True),

125

('option', _optionre, True),

131

('option', _optionre, True),

126

('field', _fieldre, True),

132

('field', _fieldre, True),

127

('definition', _definitionre, False)]

133

('definition', _definitionre, False)]

128

134

129

def match(lines, i, itemre, singleline):

135

def match(lines, i, itemre, singleline):

130

"""Does itemre match an item at line i?

136

"""Does itemre match an item at line i?

131

137

132

A list item can be followed by an idented line or another list

138

A list item can be followed by an idented line or another list

133

item (but only if singleline is True).

139

item (but only if singleline is True).

134

"""

140

"""

135

line1 = lines[i]

141

line1 = lines[i]

136

line2 = i + 1 < len(lines) and lines[i + 1] or ''

142

line2 = i + 1 < len(lines) and lines[i + 1] or ''

137

if not itemre.match(line1):

143

if not itemre.match(line1):

138

return False

144

return False

139

if singleline:

145

if singleline:

140

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

146

return line2 == '' or line2[0] == ' ' or itemre.match(line2)

141

else:

147

else:

142

return line2.startswith(' ')

148

return line2.startswith(' ')

143

149

144

i = 0

150

i = 0

145

while i < len(blocks):

151

while i < len(blocks):

146

if blocks[i]['type'] == 'paragraph':

152

if blocks[i]['type'] == 'paragraph':

147

lines = blocks[i]['lines']

153

lines = blocks[i]['lines']

148

for type, itemre, singleline in listtypes:

154

for type, itemre, singleline in listtypes:

149

if match(lines, 0, itemre, singleline):

155

if match(lines, 0, itemre, singleline):

150

items = []

156

items = []

151

for j, line in enumerate(lines):

157

for j, line in enumerate(lines):

152

if match(lines, j, itemre, singleline):

158

if match(lines, j, itemre, singleline):

153

items.append(dict(type=type, lines=[],

159

items.append(dict(type=type, lines=[],

154

indent=blocks[i]['indent']))

160

indent=blocks[i]['indent']))

155

items[-1]['lines'].append(line)

161

items[-1]['lines'].append(line)

156

blocks[i:i + 1] = items

162

blocks[i:i + 1] = items

157

break

163

break

158

i += 1

164

i += 1

159

return blocks

165

return blocks

160

166

161

167

162

_fieldwidth = 12

168

_fieldwidth = 12

163

169

164

def updatefieldlists(blocks):

170

def updatefieldlists(blocks):

165

"""Find key and maximum key width for field lists."""

171

"""Find key and maximum key width for field lists."""

166

i = 0

172

i = 0

167

while i < len(blocks):

173

while i < len(blocks):

168

if blocks[i]['type'] != 'field':

174

if blocks[i]['type'] != 'field':

169

i += 1

175

i += 1

170

continue

176

continue

171

177

172

keywidth = 0

178

keywidth = 0

173

j = i

179

j = i

174

while j < len(blocks) and blocks[j]['type'] == 'field':

180

while j < len(blocks) and blocks[j]['type'] == 'field':

175

m = _fieldre.match(blocks[j]['lines'][0])

181

m = _fieldre.match(blocks[j]['lines'][0])

176

key, rest = m.groups()

182

key, rest = m.groups()

177

blocks[j]['lines'][0] = rest

183

blocks[j]['lines'][0] = rest

178

blocks[j]['key'] = key

184

blocks[j]['key'] = key

179

keywidth = max(keywidth, len(key))

185

keywidth = max(keywidth, len(key))

180

j += 1

186

j += 1

181

187

182

for block in blocks[i:j]:

188

for block in blocks[i:j]:

183

block['keywidth'] = keywidth

189

block['keywidth'] = keywidth

184

i = j + 1

190

i = j + 1

185

191

186

return blocks

192

return blocks

187

193

188

194

189

def prunecontainers(blocks, keep):

195

def prunecontainers(blocks, keep):

190

"""Prune unwanted containers.

196

"""Prune unwanted containers.

191

197

192

The blocks must have a 'type' field, i.e., they should have been

198

The blocks must have a 'type' field, i.e., they should have been

193

run through findliteralblocks first.

199

run through findliteralblocks first.

194

"""

200

"""

195

pruned = []

201

pruned = []

196

i = 0

202

i = 0

197

while i + 1 < len(blocks):

203

while i + 1 < len(blocks):

198

# Searching for a block that looks like this:

204

# Searching for a block that looks like this:

199

#

205

#

200

# +-------+---------------------------+

206

# +-------+---------------------------+

201

# | ".. container ::" type |

207

# | ".. container ::" type |

202

# +---+ |

208

# +---+ |

203

# | blocks |

209

# | blocks |

204

# +-------------------------------+

210

# +-------------------------------+

205

if (blocks[i]['type'] == 'paragraph' and

211

if (blocks[i]['type'] == 'paragraph' and

206

blocks[i]['lines'][0].startswith('.. container::')):

212

blocks[i]['lines'][0].startswith('.. container::')):

207

indent = blocks[i]['indent']

213

indent = blocks[i]['indent']

208

adjustment = blocks[i + 1]['indent'] - indent

214

adjustment = blocks[i + 1]['indent'] - indent

209

containertype = blocks[i]['lines'][0][15:]

215

containertype = blocks[i]['lines'][0][15:]

210

prune = containertype not in keep

216

prune = containertype not in keep

211

if prune:

217

if prune:

212

pruned.append(containertype)

218

pruned.append(containertype)

213

219

214

# Always delete "..container:: type" block

220

# Always delete "..container:: type" block

215

del blocks[i]

221

del blocks[i]

216

j = i

222

j = i

217

while j < len(blocks) and blocks[j]['indent'] > indent:

223

while j < len(blocks) and blocks[j]['indent'] > indent:

218

if prune:

224

if prune:

219

del blocks[j]

225

del blocks[j]

220

i -= 1 # adjust outer index

226

i -= 1 # adjust outer index

221

else:

227

else:

222

blocks[j]['indent'] -= adjustment

228

blocks[j]['indent'] -= adjustment

223

j += 1

229

j += 1

224

i += 1

230

i += 1

225

return blocks, pruned

231

return blocks, pruned

226

232

227

233

228

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

234

_sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")

229

235

230

def findsections(blocks):

236

def findsections(blocks):

231

"""Finds sections.

237

"""Finds sections.

232

238

233

The blocks must have a 'type' field, i.e., they should have been

239

The blocks must have a 'type' field, i.e., they should have been

234

run through findliteralblocks first.

240

run through findliteralblocks first.

235

"""

241

"""

236

for block in blocks:

242

for block in blocks:

237

# Searching for a block that looks like this:

243

# Searching for a block that looks like this:

238

#

244

#

239

# +------------------------------+

245

# +------------------------------+

240

# | Section title |

246

# | Section title |

241

# | ------------- |

247

# | ------------- |

242

# +------------------------------+

248

# +------------------------------+

243

if (block['type'] == 'paragraph' and

249

if (block['type'] == 'paragraph' and

244

len(block['lines']) == 2 and

250

len(block['lines']) == 2 and

245

len(block['lines'][0]) == len(block['lines'][1]) and

251

len(block['lines'][0]) == len(block['lines'][1]) and

246

_sectionre.match(block['lines'][1])):

252

_sectionre.match(block['lines'][1])):

247

block['underline'] = block['lines'][1][0]

253

block['underline'] = block['lines'][1][0]

248

block['type'] = 'section'

254

block['type'] = 'section'

249

del block['lines'][1]

255

del block['lines'][1]

250

return blocks

256

return blocks

251

257

252

258

253

def inlineliterals(blocks):

259

def inlineliterals(blocks):

260

substs = [('``', '"')]

254

for b in blocks:

261

for b in blocks:

255

if b['type'] in ('paragraph', 'section'):

262

if b['type'] in ('paragraph', 'section'):

256

b['lines'] = [l.replace(~~'``'~~, ~~'"'~~) for l in b['lines']]

263

b['lines'] = [replace(l, substs) for l in b['lines']]

257

return blocks

264

return blocks

258

265

259

266

260

def hgrole(blocks):

267

def hgrole(blocks):

268

substs = [(':hg:`', '"hg '), ('`', '"')]

261

for b in blocks:

269

for b in blocks:

262

if b['type'] in ('paragraph', 'section'):

270

if b['type'] in ('paragraph', 'section'):

263

# Turn :hg:`command` into "hg command". This also works

271

# Turn :hg:`command` into "hg command". This also works

264

# when there is a line break in the command and relies on

272

# when there is a line break in the command and relies on

265

# the fact that we have no stray back-quotes in the input

273

# the fact that we have no stray back-quotes in the input

266

# (run the blocks through inlineliterals first).

274

# (run the blocks through inlineliterals first).

267

b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')

275

b['lines'] = [replace(l, substs) for l in b['lines']]

268

for l in b['lines']]

269

return blocks

276

return blocks

270

277

271

278

272

def addmargins(blocks):

279

def addmargins(blocks):

273

"""Adds empty blocks for vertical spacing.

280

"""Adds empty blocks for vertical spacing.

274

281

275

This groups bullets, options, and definitions together with no vertical

282

This groups bullets, options, and definitions together with no vertical

276

space between them, and adds an empty block between all other blocks.

283

space between them, and adds an empty block between all other blocks.

277

"""

284

"""

278

i = 1

285

i = 1

279

while i < len(blocks):

286

while i < len(blocks):

280

if (blocks[i]['type'] == blocks[i - 1]['type'] and

287

if (blocks[i]['type'] == blocks[i - 1]['type'] and

281

blocks[i]['type'] in ('bullet', 'option', 'field')):

288

blocks[i]['type'] in ('bullet', 'option', 'field')):

282

i += 1

289

i += 1

283

else:

290

else:

284

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

291

blocks.insert(i, dict(lines=[''], indent=0, type='margin'))

285

i += 2

292

i += 2

286

return blocks

293

return blocks

287

294

288

295

289

def formatblock(block, width):

296

def formatblock(block, width):

290

"""Format a block according to width."""

297

"""Format a block according to width."""

291

if width <= 0:

298

if width <= 0:

292

width = 78

299

width = 78

293

indent = ' ' * block['indent']

300

indent = ' ' * block['indent']

294

if block['type'] == 'margin':

301

if block['type'] == 'margin':

295

return ''

302

return ''

296

if block['type'] == 'literal':

303

if block['type'] == 'literal':

297

indent += ' '

304

indent += ' '

298

return indent + ('\n' + indent).join(block['lines'])

305

return indent + ('\n' + indent).join(block['lines'])

299

if block['type'] == 'section':

306

if block['type'] == 'section':

300

underline = len(block['lines'][0]) * block['underline']

307

underline = len(block['lines'][0]) * block['underline']

301

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

308

return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)

302

if block['type'] == 'definition':

309

if block['type'] == 'definition':

303

term = indent + block['lines'][0]

310

term = indent + block['lines'][0]

304

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

311

hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())

305

defindent = indent + hang * ' '

312

defindent = indent + hang * ' '

306

text = ' '.join(map(str.strip, block['lines'][1:]))

313

text = ' '.join(map(str.strip, block['lines'][1:]))

307

return '%s\n%s' % (term, util.wrap(text, width=width,

314

return '%s\n%s' % (term, util.wrap(text, width=width,

308

initindent=defindent,

315

initindent=defindent,

309

hangindent=defindent))

316

hangindent=defindent))

310

subindent = indent

317

subindent = indent

311

if block['type'] == 'bullet':

318

if block['type'] == 'bullet':

312

if block['lines'][0].startswith('| '):

319

if block['lines'][0].startswith('| '):

313

# Remove bullet for line blocks and add no extra

320

# Remove bullet for line blocks and add no extra

314

# indention.

321

# indention.

315

block['lines'][0] = block['lines'][0][2:]

322

block['lines'][0] = block['lines'][0][2:]

316

else:

323

else:

317

m = _bulletre.match(block['lines'][0])

324

m = _bulletre.match(block['lines'][0])

318

subindent = indent + m.end() * ' '

325

subindent = indent + m.end() * ' '

319

elif block['type'] == 'field':

326

elif block['type'] == 'field':

320

keywidth = block['keywidth']

327

keywidth = block['keywidth']

321

key = block['key']

328

key = block['key']

322

329

323

subindent = indent + _fieldwidth * ' '

330

subindent = indent + _fieldwidth * ' '

324

if len(key) + 2 > _fieldwidth:

331

if len(key) + 2 > _fieldwidth:

325

# key too large, use full line width

332

# key too large, use full line width

326

key = key.ljust(width)

333

key = key.ljust(width)

327

elif keywidth + 2 < _fieldwidth:

334

elif keywidth + 2 < _fieldwidth:

328

# all keys are small, add only two spaces

335

# all keys are small, add only two spaces

329

key = key.ljust(keywidth + 2)

336

key = key.ljust(keywidth + 2)

330

subindent = indent + (keywidth + 2) * ' '

337

subindent = indent + (keywidth + 2) * ' '

331

else:

338

else:

332

# mixed sizes, use fieldwidth for this one

339

# mixed sizes, use fieldwidth for this one

333

key = key.ljust(_fieldwidth)

340

key = key.ljust(_fieldwidth)

334

block['lines'][0] = key + block['lines'][0]

341

block['lines'][0] = key + block['lines'][0]

335

elif block['type'] == 'option':

342

elif block['type'] == 'option':

336

m = _optionre.match(block['lines'][0])

343

m = _optionre.match(block['lines'][0])

337

option, arg, rest = m.groups()

344

option, arg, rest = m.groups()

338

subindent = indent + (len(option) + len(arg)) * ' '

345

subindent = indent + (len(option) + len(arg)) * ' '

339

346

340

text = ' '.join(map(str.strip, block['lines']))

347

text = ' '.join(map(str.strip, block['lines']))

341

return util.wrap(text, width=width,

348

return util.wrap(text, width=width,

342

initindent=indent,

349

initindent=indent,

343

hangindent=subindent)

350

hangindent=subindent)

344

351

345

352

346

def format(text, width, indent=0, keep=None):

353

def format(text, width, indent=0, keep=None):

347

"""Parse and format the text according to width."""

354

"""Parse and format the text according to width."""

348

blocks = findblocks(text)

355

blocks = findblocks(text)

349

for b in blocks:

356

for b in blocks:

350

b['indent'] += indent

357

b['indent'] += indent

351

blocks = findliteralblocks(blocks)

358

blocks = findliteralblocks(blocks)

352

blocks, pruned = prunecontainers(blocks, keep or [])

359

blocks, pruned = prunecontainers(blocks, keep or [])

353

blocks = findsections(blocks)

360

blocks = findsections(blocks)

354

blocks = inlineliterals(blocks)

361

blocks = inlineliterals(blocks)

355

blocks = hgrole(blocks)

362

blocks = hgrole(blocks)

356

blocks = splitparagraphs(blocks)

363

blocks = splitparagraphs(blocks)

357

blocks = updatefieldlists(blocks)

364

blocks = updatefieldlists(blocks)

358

blocks = addmargins(blocks)

365

blocks = addmargins(blocks)

359

text = '\n'.join(formatblock(b, width) for b in blocks)

366

text = '\n'.join(formatblock(b, width) for b in blocks)

360

if keep is None:

367

if keep is None:

361

return text

368

return text

362

else:

369

else:

363

return text, pruned

370

return text, pruned

364

371

365

372

366

if __name__ == "__main__":

373

if __name__ == "__main__":

367

from pprint import pprint

374

from pprint import pprint

368

375

369

def debug(func, *args):

376

def debug(func, *args):

370

blocks = func(*args)

377

blocks = func(*args)

371

print "*** after %s:" % func.__name__

378

print "*** after %s:" % func.__name__

372

pprint(blocks)

379

pprint(blocks)

373

print

380

print

374

return blocks

381

return blocks

375

382

376

text = open(sys.argv[1]).read()

383

text = open(sys.argv[1]).read()

377

blocks = debug(findblocks, text)

384

blocks = debug(findblocks, text)

378

blocks = debug(findliteralblocks, blocks)

385

blocks = debug(findliteralblocks, blocks)

379

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

386

blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])

380

blocks = debug(inlineliterals, blocks)

387

blocks = debug(inlineliterals, blocks)

381

blocks = debug(splitparagraphs, blocks)

388

blocks = debug(splitparagraphs, blocks)

382

blocks = debug(updatefieldlists, blocks)

389

blocks = debug(updatefieldlists, blocks)

383

blocks = debug(findsections, blocks)

390

blocks = debug(findsections, blocks)

384

blocks = debug(addmargins, blocks)

391

blocks = debug(addmargins, blocks)

385

print '\n'.join(formatblock(b, 30) for b in blocks)

392

print '\n'.join(formatblock(b, 30) for b in blocks)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # minirst.py - minimal reStructuredText parser
             #
             # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """simplified reStructuredText parser.
             This parser knows just enough about reStructuredText to parse the
             Mercurial docstrings.
             It cheats in a major way: nested blocks are not really nested. They
             are just indented blocks that look like they are nested. This relies
             on the user to keep the right indentation for the blocks.
             It only supports a small subset of reStructuredText:
             - sections
             - paragraphs
             - literal blocks
             - definition lists
             - bullet lists (items must start with '-')
             - enumerated lists (no autonumbering)
             - field lists (colons cannot be escaped)
             - option lists (supports only long options without arguments)
             - inline literals (no other inline markup is not recognized)
             """
             import re, sys
-            import util
+            import util, encoding
+            def replace(text, substs):
+                utext = text.decode(encoding.encoding)
+                for f, t in substs:
+                    utext = utext.replace(f, t)
+                return utext.encode(encoding.encoding)
             def findblocks(text):
                 """Find continuous blocks of lines in text.
                 Returns a list of dictionaries representing the blocks. Each block
                 has an 'indent' field and a 'lines' field.
                 """
                 blocks = [[]]
                 lines = text.splitlines()
                 for line in lines:
                     if line.strip():
                         blocks[-1].append(line)
                     elif blocks[-1]:
                         blocks.append([])
                 if not blocks[-1]:
                     del blocks[-1]
                 for i, block in enumerate(blocks):
                     indent = min((len(l) - len(l.lstrip())) for l in block)
                     blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
                 return blocks
             def findliteralblocks(blocks):
                 """Finds literal blocks and adds a 'type' field to the blocks.
                 Literal blocks are given the type 'literal', all other blocks are
                 given type the 'paragraph'.
                 """
                 i = 0
                 while i < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | paragraph                    |
                     # | (ends with "::")             |
                     # +------------------------------+
                     #    +---------------------------+
                     #    | indented literal block    |
                     #    +---------------------------+
                     blocks[i]['type'] = 'paragraph'
                     if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         if blocks[i]['lines'] == ['::']:
                             # Expanded form: remove block
                             del blocks[i]
                             i -= 1
                         elif blocks[i]['lines'][-1].endswith(' ::'):
                             # Partially minimized form: remove space and both
                             # colons.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
                         else:
                             # Fully minimized form: remove just one colon.
                             blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
                         # List items are formatted with a hanging indent. We must
                         # correct for this here while we still have the original
                         # information on the indentation of the subsequent literal
                         # blocks available.
                         m = _bulletre.match(blocks[i]['lines'][0])
                         if m:
                             indent += m.end()
                             adjustment -= m.end()
                         # Mark the following indented blocks.
                         while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
                             blocks[i + 1]['type'] = 'literal'
                             blocks[i + 1]['indent'] -= adjustment
                             i += 1
                     i += 1
                 return blocks
             _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
             _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)?  +)(.*)$')
             _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
             _definitionre = re.compile(r'[^ ]')
             def splitparagraphs(blocks):
                 """Split paragraphs into lists."""
                 # Tuples with (list type, item regexp, single line items?). Order
                 # matters: definition lists has the least specific regexp and must
                 # come last.
                 listtypes = [('bullet', _bulletre, True),
                              ('option', _optionre, True),
                              ('field', _fieldre, True),
                              ('definition', _definitionre, False)]
                 def match(lines, i, itemre, singleline):
                     """Does itemre match an item at line i?
                     A list item can be followed by an idented line or another list
                     item (but only if singleline is True).
                     """
                     line1 = lines[i]
                     line2 = i + 1 < len(lines) and lines[i + 1] or ''
                     if not itemre.match(line1):
                         return False
                     if singleline:
                         return line2 == '' or line2[0] == ' ' or itemre.match(line2)
                     else:
                         return line2.startswith(' ')
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] == 'paragraph':
                         lines = blocks[i]['lines']
                         for type, itemre, singleline in listtypes:
                             if match(lines, 0, itemre, singleline):
                                 items = []
                                 for j, line in enumerate(lines):
                                     if match(lines, j, itemre, singleline):
                                         items.append(dict(type=type, lines=[],
                                                           indent=blocks[i]['indent']))
                                     items[-1]['lines'].append(line)
                                 blocks[i:i + 1] = items
                                 break
                     i += 1
                 return blocks
             _fieldwidth = 12
             def updatefieldlists(blocks):
                 """Find key and maximum key width for field lists."""
                 i = 0
                 while i < len(blocks):
                     if blocks[i]['type'] != 'field':
                         i += 1
                         continue
                     keywidth = 0
                     j = i
                     while j < len(blocks) and blocks[j]['type'] == 'field':
                         m = _fieldre.match(blocks[j]['lines'][0])
                         key, rest = m.groups()
                         blocks[j]['lines'][0] = rest
                         blocks[j]['key'] = key
                         keywidth = max(keywidth, len(key))
                         j += 1
                     for block in blocks[i:j]:
                         block['keywidth'] = keywidth
                     i = j + 1
                 return blocks
             def prunecontainers(blocks, keep):
                 """Prune unwanted containers.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 pruned = []
                 i = 0
                 while i + 1 < len(blocks):
                     # Searching for a block that looks like this:
                     #
                     # +-------+---------------------------+
                     # | ".. container ::" type            |
                     # +---+                               |
                     #     | blocks                        |
                     #     +-------------------------------+
                     if (blocks[i]['type'] == 'paragraph' and
                         blocks[i]['lines'][0].startswith('.. container::')):
                         indent = blocks[i]['indent']
                         adjustment = blocks[i + 1]['indent'] - indent
                         containertype = blocks[i]['lines'][0][15:]
                         prune = containertype not in keep
                         if prune:
                             pruned.append(containertype)
                         # Always delete "..container:: type" block
                         del blocks[i]
                         j = i
                         while j < len(blocks) and blocks[j]['indent'] > indent:
                             if prune:
                                 del blocks[j]
                                 i -= 1 # adjust outer index
                             else:
                                 blocks[j]['indent'] -= adjustment
                                 j += 1
                     i += 1
                 return blocks, pruned
             _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
             def findsections(blocks):
                 """Finds sections.
                 The blocks must have a 'type' field, i.e., they should have been
                 run through findliteralblocks first.
                 """
                 for block in blocks:
                     # Searching for a block that looks like this:
                     #
                     # +------------------------------+
                     # | Section title                |
                     # | -------------                |
                     # +------------------------------+
                     if (block['type'] == 'paragraph' and
                         len(block['lines']) == 2 and
                         len(block['lines'][0]) == len(block['lines'][1]) and
                         _sectionre.match(block['lines'][1])):
                         block['underline'] = block['lines'][1][0]
                         block['type'] = 'section'
                         del block['lines'][1]
                 return blocks
             def inlineliterals(blocks):
+                substs = [('``', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
-                        b['lines'] = [l.replace('``', '"') for l in b['lines']]
+                        b['lines'] = [replace(l, substs) for l in b['lines']]
                 return blocks
             def hgrole(blocks):
+                substs = [(':hg:`', '"hg '), ('`', '"')]
                 for b in blocks:
                     if b['type'] in ('paragraph', 'section'):
                         # Turn :hg:`command` into "hg command". This also works
                         # when there is a line break in the command and relies on
                         # the fact that we have no stray back-quotes in the input
                         # (run the blocks through inlineliterals first).
-                        b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')
+                        b['lines'] = [replace(l, substs) for l in b['lines']]
-                                      for l in b['lines']]
                 return blocks
             def addmargins(blocks):
                 """Adds empty blocks for vertical spacing.
                 This groups bullets, options, and definitions together with no vertical
                 space between them, and adds an empty block between all other blocks.
                 """
                 i = 1
                 while i < len(blocks):
                     if (blocks[i]['type'] == blocks[i - 1]['type'] and
                         blocks[i]['type'] in ('bullet', 'option', 'field')):
                         i += 1
                     else:
                         blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
                         i += 2
                 return blocks
             def formatblock(block, width):
                 """Format a block according to width."""
                 if width <= 0:
                     width = 78
                 indent = ' ' * block['indent']
                 if block['type'] == 'margin':
                     return ''
                 if block['type'] == 'literal':
                     indent += '  '
                     return indent + ('\n' + indent).join(block['lines'])
                 if block['type'] == 'section':
                     underline = len(block['lines'][0]) * block['underline']
                     return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
                 if block['type'] == 'definition':
                     term = indent + block['lines'][0]
                     hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
                     defindent = indent + hang * ' '
                     text = ' '.join(map(str.strip, block['lines'][1:]))
                     return '%s\n%s' % (term, util.wrap(text, width=width,
                                                        initindent=defindent,
                                                        hangindent=defindent))
                 subindent = indent
                 if block['type'] == 'bullet':
                     if block['lines'][0].startswith('| '):
                         # Remove bullet for line blocks and add no extra
                         # indention.
                         block['lines'][0] = block['lines'][0][2:]
                     else:
                         m = _bulletre.match(block['lines'][0])
                         subindent = indent + m.end() * ' '
                 elif block['type'] == 'field':
                     keywidth = block['keywidth']
                     key = block['key']
                     subindent = indent + _fieldwidth * ' '
                     if len(key) + 2 > _fieldwidth:
                         # key too large, use full line width
                         key = key.ljust(width)
                     elif keywidth + 2 < _fieldwidth:
                         # all keys are small, add only two spaces
                         key = key.ljust(keywidth + 2)
                         subindent = indent + (keywidth + 2) * ' '
                     else:
                         # mixed sizes, use fieldwidth for this one
                         key = key.ljust(_fieldwidth)
                     block['lines'][0] = key + block['lines'][0]
                 elif block['type'] == 'option':
                     m = _optionre.match(block['lines'][0])
                     option, arg, rest = m.groups()
                     subindent = indent + (len(option) + len(arg)) * ' '
                 text = ' '.join(map(str.strip, block['lines']))
                 return util.wrap(text, width=width,
                                  initindent=indent,
                                  hangindent=subindent)
             def format(text, width, indent=0, keep=None):
                 """Parse and format the text according to width."""
                 blocks = findblocks(text)
                 for b in blocks:
                     b['indent'] += indent
                 blocks = findliteralblocks(blocks)
                 blocks, pruned = prunecontainers(blocks, keep or [])
                 blocks = findsections(blocks)
                 blocks = inlineliterals(blocks)
                 blocks = hgrole(blocks)
                 blocks = splitparagraphs(blocks)
                 blocks = updatefieldlists(blocks)
                 blocks = addmargins(blocks)
                 text = '\n'.join(formatblock(b, width) for b in blocks)
                 if keep is None:
                     return text
                 else:
                     return text, pruned
             if __name__ == "__main__":
                 from pprint import pprint
                 def debug(func, *args):
                     blocks = func(*args)
                     print "*** after %s:" % func.__name__
                     pprint(blocks)
                     print
                     return blocks
                 text = open(sys.argv[1]).read()
                 blocks = debug(findblocks, text)
                 blocks = debug(findliteralblocks, blocks)
                 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
                 blocks = debug(inlineliterals, blocks)
                 blocks = debug(splitparagraphs, blocks)
                 blocks = debug(updatefieldlists, blocks)
                 blocks = debug(findsections, blocks)
                 blocks = debug(addmargins, blocks)
                 print '\n'.join(formatblock(b, 30) for b in blocks)