##// END OF EJS Templates
minirst: pull admonition titles out formatblock function
Martin Geisler -
r12652:3c31c0e4 default
parent child Browse files
Show More
@@ -1,437 +1,437 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - sections
20 20
21 21 - paragraphs
22 22
23 23 - literal blocks
24 24
25 25 - definition lists
26 26
27 27 - specific admonitions
28 28
29 29 - bullet lists (items must start with '-')
30 30
31 31 - enumerated lists (no autonumbering)
32 32
33 33 - field lists (colons cannot be escaped)
34 34
35 35 - option lists (supports only long options without arguments)
36 36
37 37 - inline literals (no other inline markup is not recognized)
38 38 """
39 39
40 40 import re, sys
41 41 import util, encoding
42 42 from i18n import _
43 43
44 44
45 45 def replace(text, substs):
46 46 utext = text.decode(encoding.encoding)
47 47 for f, t in substs:
48 48 utext = utext.replace(f, t)
49 49 return utext.encode(encoding.encoding)
50 50
51 51
52 52 _blockre = re.compile(r"\n(?:\s*\n)+")
53 53
54 54 def findblocks(text):
55 55 """Find continuous blocks of lines in text.
56 56
57 57 Returns a list of dictionaries representing the blocks. Each block
58 58 has an 'indent' field and a 'lines' field.
59 59 """
60 60 blocks = []
61 61 for b in _blockre.split(text.strip()):
62 62 lines = b.splitlines()
63 63 indent = min((len(l) - len(l.lstrip())) for l in lines)
64 64 lines = [l[indent:] for l in lines]
65 65 blocks.append(dict(indent=indent, lines=lines))
66 66 return blocks
67 67
68 68
69 69 def findliteralblocks(blocks):
70 70 """Finds literal blocks and adds a 'type' field to the blocks.
71 71
72 72 Literal blocks are given the type 'literal', all other blocks are
73 73 given type the 'paragraph'.
74 74 """
75 75 i = 0
76 76 while i < len(blocks):
77 77 # Searching for a block that looks like this:
78 78 #
79 79 # +------------------------------+
80 80 # | paragraph |
81 81 # | (ends with "::") |
82 82 # +------------------------------+
83 83 # +---------------------------+
84 84 # | indented literal block |
85 85 # +---------------------------+
86 86 blocks[i]['type'] = 'paragraph'
87 87 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
88 88 indent = blocks[i]['indent']
89 89 adjustment = blocks[i + 1]['indent'] - indent
90 90
91 91 if blocks[i]['lines'] == ['::']:
92 92 # Expanded form: remove block
93 93 del blocks[i]
94 94 i -= 1
95 95 elif blocks[i]['lines'][-1].endswith(' ::'):
96 96 # Partially minimized form: remove space and both
97 97 # colons.
98 98 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
99 99 else:
100 100 # Fully minimized form: remove just one colon.
101 101 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
102 102
103 103 # List items are formatted with a hanging indent. We must
104 104 # correct for this here while we still have the original
105 105 # information on the indentation of the subsequent literal
106 106 # blocks available.
107 107 m = _bulletre.match(blocks[i]['lines'][0])
108 108 if m:
109 109 indent += m.end()
110 110 adjustment -= m.end()
111 111
112 112 # Mark the following indented blocks.
113 113 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
114 114 blocks[i + 1]['type'] = 'literal'
115 115 blocks[i + 1]['indent'] -= adjustment
116 116 i += 1
117 117 i += 1
118 118 return blocks
119 119
120 120 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
121 121 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
122 122 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
123 123 _definitionre = re.compile(r'[^ ]')
124 124
125 125 def splitparagraphs(blocks):
126 126 """Split paragraphs into lists."""
127 127 # Tuples with (list type, item regexp, single line items?). Order
128 128 # matters: definition lists has the least specific regexp and must
129 129 # come last.
130 130 listtypes = [('bullet', _bulletre, True),
131 131 ('option', _optionre, True),
132 132 ('field', _fieldre, True),
133 133 ('definition', _definitionre, False)]
134 134
135 135 def match(lines, i, itemre, singleline):
136 136 """Does itemre match an item at line i?
137 137
138 138 A list item can be followed by an idented line or another list
139 139 item (but only if singleline is True).
140 140 """
141 141 line1 = lines[i]
142 142 line2 = i + 1 < len(lines) and lines[i + 1] or ''
143 143 if not itemre.match(line1):
144 144 return False
145 145 if singleline:
146 146 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
147 147 else:
148 148 return line2.startswith(' ')
149 149
150 150 i = 0
151 151 while i < len(blocks):
152 152 if blocks[i]['type'] == 'paragraph':
153 153 lines = blocks[i]['lines']
154 154 for type, itemre, singleline in listtypes:
155 155 if match(lines, 0, itemre, singleline):
156 156 items = []
157 157 for j, line in enumerate(lines):
158 158 if match(lines, j, itemre, singleline):
159 159 items.append(dict(type=type, lines=[],
160 160 indent=blocks[i]['indent']))
161 161 items[-1]['lines'].append(line)
162 162 blocks[i:i + 1] = items
163 163 break
164 164 i += 1
165 165 return blocks
166 166
167 167
168 168 _fieldwidth = 12
169 169
170 170 def updatefieldlists(blocks):
171 171 """Find key and maximum key width for field lists."""
172 172 i = 0
173 173 while i < len(blocks):
174 174 if blocks[i]['type'] != 'field':
175 175 i += 1
176 176 continue
177 177
178 178 keywidth = 0
179 179 j = i
180 180 while j < len(blocks) and blocks[j]['type'] == 'field':
181 181 m = _fieldre.match(blocks[j]['lines'][0])
182 182 key, rest = m.groups()
183 183 blocks[j]['lines'][0] = rest
184 184 blocks[j]['key'] = key
185 185 keywidth = max(keywidth, len(key))
186 186 j += 1
187 187
188 188 for block in blocks[i:j]:
189 189 block['keywidth'] = keywidth
190 190 i = j + 1
191 191
192 192 return blocks
193 193
194 194
195 195 def prunecontainers(blocks, keep):
196 196 """Prune unwanted containers.
197 197
198 198 The blocks must have a 'type' field, i.e., they should have been
199 199 run through findliteralblocks first.
200 200 """
201 201 pruned = []
202 202 i = 0
203 203 while i + 1 < len(blocks):
204 204 # Searching for a block that looks like this:
205 205 #
206 206 # +-------+---------------------------+
207 207 # | ".. container ::" type |
208 208 # +---+ |
209 209 # | blocks |
210 210 # +-------------------------------+
211 211 if (blocks[i]['type'] == 'paragraph' and
212 212 blocks[i]['lines'][0].startswith('.. container::')):
213 213 indent = blocks[i]['indent']
214 214 adjustment = blocks[i + 1]['indent'] - indent
215 215 containertype = blocks[i]['lines'][0][15:]
216 216 prune = containertype not in keep
217 217 if prune:
218 218 pruned.append(containertype)
219 219
220 220 # Always delete "..container:: type" block
221 221 del blocks[i]
222 222 j = i
223 223 while j < len(blocks) and blocks[j]['indent'] > indent:
224 224 if prune:
225 225 del blocks[j]
226 226 i -= 1 # adjust outer index
227 227 else:
228 228 blocks[j]['indent'] -= adjustment
229 229 j += 1
230 230 i += 1
231 231 return blocks, pruned
232 232
233 233
234 234 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
235 235
236 236 def findsections(blocks):
237 237 """Finds sections.
238 238
239 239 The blocks must have a 'type' field, i.e., they should have been
240 240 run through findliteralblocks first.
241 241 """
242 242 for block in blocks:
243 243 # Searching for a block that looks like this:
244 244 #
245 245 # +------------------------------+
246 246 # | Section title |
247 247 # | ------------- |
248 248 # +------------------------------+
249 249 if (block['type'] == 'paragraph' and
250 250 len(block['lines']) == 2 and
251 251 len(block['lines'][0]) == len(block['lines'][1]) and
252 252 _sectionre.match(block['lines'][1])):
253 253 block['underline'] = block['lines'][1][0]
254 254 block['type'] = 'section'
255 255 del block['lines'][1]
256 256 return blocks
257 257
258 258
259 259 def inlineliterals(blocks):
260 260 substs = [('``', '"')]
261 261 for b in blocks:
262 262 if b['type'] in ('paragraph', 'section'):
263 263 b['lines'] = [replace(l, substs) for l in b['lines']]
264 264 return blocks
265 265
266 266
267 267 def hgrole(blocks):
268 268 substs = [(':hg:`', '"hg '), ('`', '"')]
269 269 for b in blocks:
270 270 if b['type'] in ('paragraph', 'section'):
271 271 # Turn :hg:`command` into "hg command". This also works
272 272 # when there is a line break in the command and relies on
273 273 # the fact that we have no stray back-quotes in the input
274 274 # (run the blocks through inlineliterals first).
275 275 b['lines'] = [replace(l, substs) for l in b['lines']]
276 276 return blocks
277 277
278 278
279 279 def addmargins(blocks):
280 280 """Adds empty blocks for vertical spacing.
281 281
282 282 This groups bullets, options, and definitions together with no vertical
283 283 space between them, and adds an empty block between all other blocks.
284 284 """
285 285 i = 1
286 286 while i < len(blocks):
287 287 if (blocks[i]['type'] == blocks[i - 1]['type'] and
288 288 blocks[i]['type'] in ('bullet', 'option', 'field')):
289 289 i += 1
290 290 else:
291 291 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
292 292 i += 2
293 293 return blocks
294 294
295 295 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
296 296 r"error|hint|important|note|tip|warning)::",
297 297 flags=re.IGNORECASE)
298 298
299 299 def findadmonitions(blocks):
300 300 """
301 301 Makes the type of the block an admonition block if
302 302 the first line is an admonition directive
303 303 """
304 304 i = 0
305 305 while i < len(blocks):
306 306 m = _admonitionre.match(blocks[i]['lines'][0])
307 307 if m:
308 308 blocks[i]['type'] = 'admonition'
309 309 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
310 310
311 311 firstline = blocks[i]['lines'][0][m.end() + 1:]
312 312 if firstline:
313 313 blocks[i]['lines'].insert(1, ' ' + firstline)
314 314
315 315 blocks[i]['admonitiontitle'] = admonitiontitle
316 316 del blocks[i]['lines'][0]
317 317 i = i + 1
318 318 return blocks
319 319
320 def formatblock(block, width):
321 """Format a block according to width."""
322 if width <= 0:
323 width = 78
324 indent = ' ' * block['indent']
325 if block['type'] == 'admonition':
326 titles = {'attention': _('Attention:'),
320 _admonitiontitles = {'attention': _('Attention:'),
327 321 'caution': _('Caution:'),
328 322 'danger': _('!Danger!') ,
329 323 'error': _('Error:'),
330 324 'hint': _('Hint:'),
331 325 'important': _('Important:'),
332 326 'note': _('Note:'),
333 327 'tip': _('Tip:'),
334 328 'warning': _('Warning!')}
335 329
336 admonition = titles[block['admonitiontitle']]
330 def formatblock(block, width):
331 """Format a block according to width."""
332 if width <= 0:
333 width = 78
334 indent = ' ' * block['indent']
335 if block['type'] == 'admonition':
336 admonition = _admonitiontitles[block['admonitiontitle']]
337 337 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
338 338
339 339 defindent = indent + hang * ' '
340 340 text = ' '.join(map(str.strip, block['lines']))
341 341 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
342 342 initindent=defindent,
343 343 hangindent=defindent))
344 344 if block['type'] == 'margin':
345 345 return ''
346 346 if block['type'] == 'literal':
347 347 indent += ' '
348 348 return indent + ('\n' + indent).join(block['lines'])
349 349 if block['type'] == 'section':
350 350 underline = len(block['lines'][0]) * block['underline']
351 351 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
352 352 if block['type'] == 'definition':
353 353 term = indent + block['lines'][0]
354 354 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
355 355 defindent = indent + hang * ' '
356 356 text = ' '.join(map(str.strip, block['lines'][1:]))
357 357 return '%s\n%s' % (term, util.wrap(text, width=width,
358 358 initindent=defindent,
359 359 hangindent=defindent))
360 360 subindent = indent
361 361 if block['type'] == 'bullet':
362 362 if block['lines'][0].startswith('| '):
363 363 # Remove bullet for line blocks and add no extra
364 364 # indention.
365 365 block['lines'][0] = block['lines'][0][2:]
366 366 else:
367 367 m = _bulletre.match(block['lines'][0])
368 368 subindent = indent + m.end() * ' '
369 369 elif block['type'] == 'field':
370 370 keywidth = block['keywidth']
371 371 key = block['key']
372 372
373 373 subindent = indent + _fieldwidth * ' '
374 374 if len(key) + 2 > _fieldwidth:
375 375 # key too large, use full line width
376 376 key = key.ljust(width)
377 377 elif keywidth + 2 < _fieldwidth:
378 378 # all keys are small, add only two spaces
379 379 key = key.ljust(keywidth + 2)
380 380 subindent = indent + (keywidth + 2) * ' '
381 381 else:
382 382 # mixed sizes, use fieldwidth for this one
383 383 key = key.ljust(_fieldwidth)
384 384 block['lines'][0] = key + block['lines'][0]
385 385 elif block['type'] == 'option':
386 386 m = _optionre.match(block['lines'][0])
387 387 option, arg, rest = m.groups()
388 388 subindent = indent + (len(option) + len(arg)) * ' '
389 389
390 390 text = ' '.join(map(str.strip, block['lines']))
391 391 return util.wrap(text, width=width,
392 392 initindent=indent,
393 393 hangindent=subindent)
394 394
395 395
396 396 def format(text, width, indent=0, keep=None):
397 397 """Parse and format the text according to width."""
398 398 blocks = findblocks(text)
399 399 for b in blocks:
400 400 b['indent'] += indent
401 401 blocks = findliteralblocks(blocks)
402 402 blocks, pruned = prunecontainers(blocks, keep or [])
403 403 blocks = findsections(blocks)
404 404 blocks = inlineliterals(blocks)
405 405 blocks = hgrole(blocks)
406 406 blocks = splitparagraphs(blocks)
407 407 blocks = updatefieldlists(blocks)
408 408 blocks = addmargins(blocks)
409 409 blocks = findadmonitions(blocks)
410 410 text = '\n'.join(formatblock(b, width) for b in blocks)
411 411 if keep is None:
412 412 return text
413 413 else:
414 414 return text, pruned
415 415
416 416
417 417 if __name__ == "__main__":
418 418 from pprint import pprint
419 419
420 420 def debug(func, *args):
421 421 blocks = func(*args)
422 422 print "*** after %s:" % func.__name__
423 423 pprint(blocks)
424 424 print
425 425 return blocks
426 426
427 427 text = open(sys.argv[1]).read()
428 428 blocks = debug(findblocks, text)
429 429 blocks = debug(findliteralblocks, blocks)
430 430 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
431 431 blocks = debug(inlineliterals, blocks)
432 432 blocks = debug(splitparagraphs, blocks)
433 433 blocks = debug(updatefieldlists, blocks)
434 434 blocks = debug(findsections, blocks)
435 435 blocks = debug(addmargins, blocks)
436 436 blocks = debug(findadmonitions, blocks)
437 437 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now