##// END OF EJS Templates
minirst: don't choke on empty text
Matt Mackall -
r15123:9b41ccb2 default
parent child Browse files
Show More
@@ -1,561 +1,562 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 21 import re
22 22 import util, encoding
23 23 from i18n import _
24 24
25 25 def replace(text, substs):
26 26 for f, t in substs:
27 27 text = text.replace(f, t)
28 28 return text
29 29
30 30 _blockre = re.compile(r"\n(?:\s*\n)+")
31 31
32 32 def findblocks(text):
33 33 """Find continuous blocks of lines in text.
34 34
35 35 Returns a list of dictionaries representing the blocks. Each block
36 36 has an 'indent' field and a 'lines' field.
37 37 """
38 38 blocks = []
39 39 for b in _blockre.split(text.lstrip('\n').rstrip()):
40 40 lines = b.splitlines()
41 if lines:
41 42 indent = min((len(l) - len(l.lstrip())) for l in lines)
42 43 lines = [l[indent:] for l in lines]
43 44 blocks.append(dict(indent=indent, lines=lines))
44 45 return blocks
45 46
46 47 def findliteralblocks(blocks):
47 48 """Finds literal blocks and adds a 'type' field to the blocks.
48 49
49 50 Literal blocks are given the type 'literal', all other blocks are
50 51 given type the 'paragraph'.
51 52 """
52 53 i = 0
53 54 while i < len(blocks):
54 55 # Searching for a block that looks like this:
55 56 #
56 57 # +------------------------------+
57 58 # | paragraph |
58 59 # | (ends with "::") |
59 60 # +------------------------------+
60 61 # +---------------------------+
61 62 # | indented literal block |
62 63 # +---------------------------+
63 64 blocks[i]['type'] = 'paragraph'
64 65 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
65 66 indent = blocks[i]['indent']
66 67 adjustment = blocks[i + 1]['indent'] - indent
67 68
68 69 if blocks[i]['lines'] == ['::']:
69 70 # Expanded form: remove block
70 71 del blocks[i]
71 72 i -= 1
72 73 elif blocks[i]['lines'][-1].endswith(' ::'):
73 74 # Partially minimized form: remove space and both
74 75 # colons.
75 76 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
76 77 else:
77 78 # Fully minimized form: remove just one colon.
78 79 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
79 80
80 81 # List items are formatted with a hanging indent. We must
81 82 # correct for this here while we still have the original
82 83 # information on the indentation of the subsequent literal
83 84 # blocks available.
84 85 m = _bulletre.match(blocks[i]['lines'][0])
85 86 if m:
86 87 indent += m.end()
87 88 adjustment -= m.end()
88 89
89 90 # Mark the following indented blocks.
90 91 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
91 92 blocks[i + 1]['type'] = 'literal'
92 93 blocks[i + 1]['indent'] -= adjustment
93 94 i += 1
94 95 i += 1
95 96 return blocks
96 97
97 98 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
98 99 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
99 100 r'((.*) +)(.*)$')
100 101 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
101 102 _definitionre = re.compile(r'[^ ]')
102 103 _tablere = re.compile(r'(=+\s+)*=+')
103 104
104 105 def splitparagraphs(blocks):
105 106 """Split paragraphs into lists."""
106 107 # Tuples with (list type, item regexp, single line items?). Order
107 108 # matters: definition lists has the least specific regexp and must
108 109 # come last.
109 110 listtypes = [('bullet', _bulletre, True),
110 111 ('option', _optionre, True),
111 112 ('field', _fieldre, True),
112 113 ('definition', _definitionre, False)]
113 114
114 115 def match(lines, i, itemre, singleline):
115 116 """Does itemre match an item at line i?
116 117
117 118 A list item can be followed by an idented line or another list
118 119 item (but only if singleline is True).
119 120 """
120 121 line1 = lines[i]
121 122 line2 = i + 1 < len(lines) and lines[i + 1] or ''
122 123 if not itemre.match(line1):
123 124 return False
124 125 if singleline:
125 126 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
126 127 else:
127 128 return line2.startswith(' ')
128 129
129 130 i = 0
130 131 while i < len(blocks):
131 132 if blocks[i]['type'] == 'paragraph':
132 133 lines = blocks[i]['lines']
133 134 for type, itemre, singleline in listtypes:
134 135 if match(lines, 0, itemre, singleline):
135 136 items = []
136 137 for j, line in enumerate(lines):
137 138 if match(lines, j, itemre, singleline):
138 139 items.append(dict(type=type, lines=[],
139 140 indent=blocks[i]['indent']))
140 141 items[-1]['lines'].append(line)
141 142 blocks[i:i + 1] = items
142 143 break
143 144 i += 1
144 145 return blocks
145 146
146 147 _fieldwidth = 12
147 148
148 149 def updatefieldlists(blocks):
149 150 """Find key and maximum key width for field lists."""
150 151 i = 0
151 152 while i < len(blocks):
152 153 if blocks[i]['type'] != 'field':
153 154 i += 1
154 155 continue
155 156
156 157 keywidth = 0
157 158 j = i
158 159 while j < len(blocks) and blocks[j]['type'] == 'field':
159 160 m = _fieldre.match(blocks[j]['lines'][0])
160 161 key, rest = m.groups()
161 162 blocks[j]['lines'][0] = rest
162 163 blocks[j]['key'] = key
163 164 keywidth = max(keywidth, len(key))
164 165 j += 1
165 166
166 167 for block in blocks[i:j]:
167 168 block['keywidth'] = keywidth
168 169 i = j + 1
169 170
170 171 return blocks
171 172
172 173 def updateoptionlists(blocks):
173 174 i = 0
174 175 while i < len(blocks):
175 176 if blocks[i]['type'] != 'option':
176 177 i += 1
177 178 continue
178 179
179 180 optstrwidth = 0
180 181 j = i
181 182 while j < len(blocks) and blocks[j]['type'] == 'option':
182 183 m = _optionre.match(blocks[j]['lines'][0])
183 184
184 185 shortoption = m.group(2)
185 186 group3 = m.group(3)
186 187 longoption = group3[2:].strip()
187 188 desc = m.group(6).strip()
188 189 longoptionarg = m.group(5).strip()
189 190 blocks[j]['lines'][0] = desc
190 191
191 192 noshortop = ''
192 193 if not shortoption:
193 194 noshortop = ' '
194 195
195 196 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
196 197 ("%s--%s %s") % (noshortop, longoption,
197 198 longoptionarg))
198 199 opt = opt.rstrip()
199 200 blocks[j]['optstr'] = opt
200 201 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
201 202 j += 1
202 203
203 204 for block in blocks[i:j]:
204 205 block['optstrwidth'] = optstrwidth
205 206 i = j + 1
206 207 return blocks
207 208
208 209 def prunecontainers(blocks, keep):
209 210 """Prune unwanted containers.
210 211
211 212 The blocks must have a 'type' field, i.e., they should have been
212 213 run through findliteralblocks first.
213 214 """
214 215 pruned = []
215 216 i = 0
216 217 while i + 1 < len(blocks):
217 218 # Searching for a block that looks like this:
218 219 #
219 220 # +-------+---------------------------+
220 221 # | ".. container ::" type |
221 222 # +---+ |
222 223 # | blocks |
223 224 # +-------------------------------+
224 225 if (blocks[i]['type'] == 'paragraph' and
225 226 blocks[i]['lines'][0].startswith('.. container::')):
226 227 indent = blocks[i]['indent']
227 228 adjustment = blocks[i + 1]['indent'] - indent
228 229 containertype = blocks[i]['lines'][0][15:]
229 230 prune = containertype not in keep
230 231 if prune:
231 232 pruned.append(containertype)
232 233
233 234 # Always delete "..container:: type" block
234 235 del blocks[i]
235 236 j = i
236 237 i -= 1
237 238 while j < len(blocks) and blocks[j]['indent'] > indent:
238 239 if prune:
239 240 del blocks[j]
240 241 else:
241 242 blocks[j]['indent'] -= adjustment
242 243 j += 1
243 244 i += 1
244 245 return blocks, pruned
245 246
246 247 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
247 248
248 249 def findtables(blocks):
249 250 '''Find simple tables
250 251
251 252 Only simple one-line table elements are supported
252 253 '''
253 254
254 255 for block in blocks:
255 256 # Searching for a block that looks like this:
256 257 #
257 258 # === ==== ===
258 259 # A B C
259 260 # === ==== === <- optional
260 261 # 1 2 3
261 262 # x y z
262 263 # === ==== ===
263 264 if (block['type'] == 'paragraph' and
264 265 len(block['lines']) > 4 and
265 266 _tablere.match(block['lines'][0]) and
266 267 block['lines'][0] == block['lines'][-1]):
267 268 block['type'] = 'table'
268 269 block['header'] = False
269 270 div = block['lines'][0]
270 271 columns = [x for x in xrange(len(div))
271 272 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
272 273 rows = []
273 274 for l in block['lines'][1:-1]:
274 275 if l == div:
275 276 block['header'] = True
276 277 continue
277 278 row = []
278 279 for n, start in enumerate(columns):
279 280 if n + 1 < len(columns):
280 281 row.append(l[start:columns[n + 1]].strip())
281 282 else:
282 283 row.append(l[start:].strip())
283 284 rows.append(row)
284 285 block['table'] = rows
285 286
286 287 return blocks
287 288
288 289 def findsections(blocks):
289 290 """Finds sections.
290 291
291 292 The blocks must have a 'type' field, i.e., they should have been
292 293 run through findliteralblocks first.
293 294 """
294 295 for block in blocks:
295 296 # Searching for a block that looks like this:
296 297 #
297 298 # +------------------------------+
298 299 # | Section title |
299 300 # | ------------- |
300 301 # +------------------------------+
301 302 if (block['type'] == 'paragraph' and
302 303 len(block['lines']) == 2 and
303 304 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
304 305 _sectionre.match(block['lines'][1])):
305 306 block['underline'] = block['lines'][1][0]
306 307 block['type'] = 'section'
307 308 del block['lines'][1]
308 309 return blocks
309 310
310 311 def inlineliterals(blocks):
311 312 substs = [('``', '"')]
312 313 for b in blocks:
313 314 if b['type'] in ('paragraph', 'section'):
314 315 b['lines'] = [replace(l, substs) for l in b['lines']]
315 316 return blocks
316 317
317 318 def hgrole(blocks):
318 319 substs = [(':hg:`', '"hg '), ('`', '"')]
319 320 for b in blocks:
320 321 if b['type'] in ('paragraph', 'section'):
321 322 # Turn :hg:`command` into "hg command". This also works
322 323 # when there is a line break in the command and relies on
323 324 # the fact that we have no stray back-quotes in the input
324 325 # (run the blocks through inlineliterals first).
325 326 b['lines'] = [replace(l, substs) for l in b['lines']]
326 327 return blocks
327 328
328 329 def addmargins(blocks):
329 330 """Adds empty blocks for vertical spacing.
330 331
331 332 This groups bullets, options, and definitions together with no vertical
332 333 space between them, and adds an empty block between all other blocks.
333 334 """
334 335 i = 1
335 336 while i < len(blocks):
336 337 if (blocks[i]['type'] == blocks[i - 1]['type'] and
337 338 blocks[i]['type'] in ('bullet', 'option', 'field')):
338 339 i += 1
339 340 else:
340 341 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
341 342 i += 2
342 343 return blocks
343 344
344 345 def prunecomments(blocks):
345 346 """Remove comments."""
346 347 i = 0
347 348 while i < len(blocks):
348 349 b = blocks[i]
349 350 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
350 351 b['lines'] == ['..']):
351 352 del blocks[i]
352 353 if i < len(blocks) and blocks[i]['type'] == 'margin':
353 354 del blocks[i]
354 355 else:
355 356 i += 1
356 357 return blocks
357 358
358 359 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
359 360 r"error|hint|important|note|tip|warning)::",
360 361 flags=re.IGNORECASE)
361 362
362 363 def findadmonitions(blocks):
363 364 """
364 365 Makes the type of the block an admonition block if
365 366 the first line is an admonition directive
366 367 """
367 368 i = 0
368 369 while i < len(blocks):
369 370 m = _admonitionre.match(blocks[i]['lines'][0])
370 371 if m:
371 372 blocks[i]['type'] = 'admonition'
372 373 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
373 374
374 375 firstline = blocks[i]['lines'][0][m.end() + 1:]
375 376 if firstline:
376 377 blocks[i]['lines'].insert(1, ' ' + firstline)
377 378
378 379 blocks[i]['admonitiontitle'] = admonitiontitle
379 380 del blocks[i]['lines'][0]
380 381 i = i + 1
381 382 return blocks
382 383
383 384 _admonitiontitles = {'attention': _('Attention:'),
384 385 'caution': _('Caution:'),
385 386 'danger': _('!Danger!') ,
386 387 'error': _('Error:'),
387 388 'hint': _('Hint:'),
388 389 'important': _('Important:'),
389 390 'note': _('Note:'),
390 391 'tip': _('Tip:'),
391 392 'warning': _('Warning!')}
392 393
393 394 def formatoption(block, width):
394 395 desc = ' '.join(map(str.strip, block['lines']))
395 396 colwidth = encoding.colwidth(block['optstr'])
396 397 usablewidth = width - 1
397 398 hanging = block['optstrwidth']
398 399 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
399 400 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
400 401 return ' %s' % (util.wrap(desc, usablewidth,
401 402 initindent=initindent,
402 403 hangindent=hangindent))
403 404
404 405 def formatblock(block, width):
405 406 """Format a block according to width."""
406 407 if width <= 0:
407 408 width = 78
408 409 indent = ' ' * block['indent']
409 410 if block['type'] == 'admonition':
410 411 admonition = _admonitiontitles[block['admonitiontitle']]
411 412 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
412 413
413 414 defindent = indent + hang * ' '
414 415 text = ' '.join(map(str.strip, block['lines']))
415 416 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
416 417 initindent=defindent,
417 418 hangindent=defindent))
418 419 if block['type'] == 'margin':
419 420 return ''
420 421 if block['type'] == 'literal':
421 422 indent += ' '
422 423 return indent + ('\n' + indent).join(block['lines'])
423 424 if block['type'] == 'section':
424 425 underline = encoding.colwidth(block['lines'][0]) * block['underline']
425 426 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
426 427 if block['type'] == 'table':
427 428 table = block['table']
428 429 # compute column widths
429 430 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
430 431 text = ''
431 432 span = sum(widths) + len(widths) - 1
432 433 indent = ' ' * block['indent']
433 434 hang = ' ' * (len(indent) + span - widths[-1])
434 435 f = ' '.join('%%-%ds' % n for n in widths)
435 436
436 437 for row in table:
437 438 l = f % tuple(row)
438 439 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
439 440 if not text and block['header']:
440 441 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
441 442 else:
442 443 text += l + "\n"
443 444 return text
444 445 if block['type'] == 'definition':
445 446 term = indent + block['lines'][0]
446 447 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
447 448 defindent = indent + hang * ' '
448 449 text = ' '.join(map(str.strip, block['lines'][1:]))
449 450 return '%s\n%s' % (term, util.wrap(text, width=width,
450 451 initindent=defindent,
451 452 hangindent=defindent))
452 453 subindent = indent
453 454 if block['type'] == 'bullet':
454 455 if block['lines'][0].startswith('| '):
455 456 # Remove bullet for line blocks and add no extra
456 457 # indention.
457 458 block['lines'][0] = block['lines'][0][2:]
458 459 else:
459 460 m = _bulletre.match(block['lines'][0])
460 461 subindent = indent + m.end() * ' '
461 462 elif block['type'] == 'field':
462 463 keywidth = block['keywidth']
463 464 key = block['key']
464 465
465 466 subindent = indent + _fieldwidth * ' '
466 467 if len(key) + 2 > _fieldwidth:
467 468 # key too large, use full line width
468 469 key = key.ljust(width)
469 470 elif keywidth + 2 < _fieldwidth:
470 471 # all keys are small, add only two spaces
471 472 key = key.ljust(keywidth + 2)
472 473 subindent = indent + (keywidth + 2) * ' '
473 474 else:
474 475 # mixed sizes, use fieldwidth for this one
475 476 key = key.ljust(_fieldwidth)
476 477 block['lines'][0] = key + block['lines'][0]
477 478 elif block['type'] == 'option':
478 479 return formatoption(block, width)
479 480
480 481 text = ' '.join(map(str.strip, block['lines']))
481 482 return util.wrap(text, width=width,
482 483 initindent=indent,
483 484 hangindent=subindent)
484 485
485 486 def parse(text, indent=0, keep=None):
486 487 """Parse text into a list of blocks"""
487 488 pruned = []
488 489 blocks = findblocks(text)
489 490 for b in blocks:
490 491 b['indent'] += indent
491 492 blocks = findliteralblocks(blocks)
492 493 blocks = findtables(blocks)
493 494 blocks, pruned = prunecontainers(blocks, keep or [])
494 495 blocks = findsections(blocks)
495 496 blocks = inlineliterals(blocks)
496 497 blocks = hgrole(blocks)
497 498 blocks = splitparagraphs(blocks)
498 499 blocks = updatefieldlists(blocks)
499 500 blocks = updateoptionlists(blocks)
500 501 blocks = addmargins(blocks)
501 502 blocks = prunecomments(blocks)
502 503 blocks = findadmonitions(blocks)
503 504 return blocks, pruned
504 505
505 506 def formatblocks(blocks, width):
506 507 text = '\n'.join(formatblock(b, width) for b in blocks)
507 508 return text
508 509
509 510 def format(text, width, indent=0, keep=None):
510 511 """Parse and format the text according to width."""
511 512 blocks, pruned = parse(text, indent, keep or [])
512 513 text = '\n'.join(formatblock(b, width) for b in blocks)
513 514 if keep is None:
514 515 return text
515 516 else:
516 517 return text, pruned
517 518
518 519 def getsections(blocks):
519 520 '''return a list of (section name, nesting level, blocks) tuples'''
520 521 nest = ""
521 522 level = 0
522 523 secs = []
523 524 for b in blocks:
524 525 if b['type'] == 'section':
525 526 i = b['underline']
526 527 if i not in nest:
527 528 nest += i
528 529 level = nest.index(i) + 1
529 530 nest = nest[:level]
530 531 secs.append((b['lines'][0], level, [b]))
531 532 else:
532 533 if not secs:
533 534 # add an initial empty section
534 535 secs = [('', 0, [])]
535 536 secs[-1][2].append(b)
536 537 return secs
537 538
538 539 def decorateblocks(blocks, width):
539 540 '''generate a list of (section name, line text) pairs for search'''
540 541 lines = []
541 542 for s in getsections(blocks):
542 543 section = s[0]
543 544 text = formatblocks(s[2], width)
544 545 lines.append([(section, l) for l in text.splitlines(True)])
545 546 return lines
546 547
547 548 def maketable(data, indent=0, header=False):
548 549 '''Generate an RST table for the given table data'''
549 550
550 551 widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)]
551 552 indent = ' ' * indent
552 553 f = indent + ' '.join('%%-%ds' % w for w in widths) + '\n'
553 554 div = indent + ' '.join('=' * w for w in widths) + '\n'
554 555
555 556 out = [div]
556 557 for row in data:
557 558 out.append(f % tuple(row))
558 559 if header and len(data) > 1:
559 560 out.insert(2, div)
560 561 out.append(div)
561 562 return ''.join(out)
General Comments 0
You need to be logged in to leave comments. Login now