##// END OF EJS Templates
minirst: remove pointless transcoding
Matt Mackall -
r15121:0ad0ebe6 default
parent child Browse files
Show More
@@ -1,571 +1,569 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 21 import re
22 22 import util, encoding
23 23 from i18n import _
24 24
25 25
26 26 def replace(text, substs):
27 utext = text.decode(encoding.encoding)
28 27 for f, t in substs:
29 utext = utext.replace(f, t)
30 return utext.encode(encoding.encoding)
31
28 text = text.replace(f, t)
29 return text
32 30
33 31 _blockre = re.compile(r"\n(?:\s*\n)+")
34 32
35 33 def findblocks(text):
36 34 """Find continuous blocks of lines in text.
37 35
38 36 Returns a list of dictionaries representing the blocks. Each block
39 37 has an 'indent' field and a 'lines' field.
40 38 """
41 39 blocks = []
42 40 for b in _blockre.split(text.lstrip('\n').rstrip()):
43 41 lines = b.splitlines()
44 42 indent = min((len(l) - len(l.lstrip())) for l in lines)
45 43 lines = [l[indent:] for l in lines]
46 44 blocks.append(dict(indent=indent, lines=lines))
47 45 return blocks
48 46
49 47
50 48 def findliteralblocks(blocks):
51 49 """Finds literal blocks and adds a 'type' field to the blocks.
52 50
53 51 Literal blocks are given the type 'literal', all other blocks are
54 52 given type the 'paragraph'.
55 53 """
56 54 i = 0
57 55 while i < len(blocks):
58 56 # Searching for a block that looks like this:
59 57 #
60 58 # +------------------------------+
61 59 # | paragraph |
62 60 # | (ends with "::") |
63 61 # +------------------------------+
64 62 # +---------------------------+
65 63 # | indented literal block |
66 64 # +---------------------------+
67 65 blocks[i]['type'] = 'paragraph'
68 66 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
69 67 indent = blocks[i]['indent']
70 68 adjustment = blocks[i + 1]['indent'] - indent
71 69
72 70 if blocks[i]['lines'] == ['::']:
73 71 # Expanded form: remove block
74 72 del blocks[i]
75 73 i -= 1
76 74 elif blocks[i]['lines'][-1].endswith(' ::'):
77 75 # Partially minimized form: remove space and both
78 76 # colons.
79 77 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
80 78 else:
81 79 # Fully minimized form: remove just one colon.
82 80 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
83 81
84 82 # List items are formatted with a hanging indent. We must
85 83 # correct for this here while we still have the original
86 84 # information on the indentation of the subsequent literal
87 85 # blocks available.
88 86 m = _bulletre.match(blocks[i]['lines'][0])
89 87 if m:
90 88 indent += m.end()
91 89 adjustment -= m.end()
92 90
93 91 # Mark the following indented blocks.
94 92 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
95 93 blocks[i + 1]['type'] = 'literal'
96 94 blocks[i + 1]['indent'] -= adjustment
97 95 i += 1
98 96 i += 1
99 97 return blocks
100 98
101 99 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
102 100 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
103 101 r'((.*) +)(.*)$')
104 102 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
105 103 _definitionre = re.compile(r'[^ ]')
106 104 _tablere = re.compile(r'(=+\s+)*=+')
107 105
108 106 def splitparagraphs(blocks):
109 107 """Split paragraphs into lists."""
110 108 # Tuples with (list type, item regexp, single line items?). Order
111 109 # matters: definition lists has the least specific regexp and must
112 110 # come last.
113 111 listtypes = [('bullet', _bulletre, True),
114 112 ('option', _optionre, True),
115 113 ('field', _fieldre, True),
116 114 ('definition', _definitionre, False)]
117 115
118 116 def match(lines, i, itemre, singleline):
119 117 """Does itemre match an item at line i?
120 118
121 119 A list item can be followed by an idented line or another list
122 120 item (but only if singleline is True).
123 121 """
124 122 line1 = lines[i]
125 123 line2 = i + 1 < len(lines) and lines[i + 1] or ''
126 124 if not itemre.match(line1):
127 125 return False
128 126 if singleline:
129 127 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
130 128 else:
131 129 return line2.startswith(' ')
132 130
133 131 i = 0
134 132 while i < len(blocks):
135 133 if blocks[i]['type'] == 'paragraph':
136 134 lines = blocks[i]['lines']
137 135 for type, itemre, singleline in listtypes:
138 136 if match(lines, 0, itemre, singleline):
139 137 items = []
140 138 for j, line in enumerate(lines):
141 139 if match(lines, j, itemre, singleline):
142 140 items.append(dict(type=type, lines=[],
143 141 indent=blocks[i]['indent']))
144 142 items[-1]['lines'].append(line)
145 143 blocks[i:i + 1] = items
146 144 break
147 145 i += 1
148 146 return blocks
149 147
150 148
151 149 _fieldwidth = 12
152 150
153 151 def updatefieldlists(blocks):
154 152 """Find key and maximum key width for field lists."""
155 153 i = 0
156 154 while i < len(blocks):
157 155 if blocks[i]['type'] != 'field':
158 156 i += 1
159 157 continue
160 158
161 159 keywidth = 0
162 160 j = i
163 161 while j < len(blocks) and blocks[j]['type'] == 'field':
164 162 m = _fieldre.match(blocks[j]['lines'][0])
165 163 key, rest = m.groups()
166 164 blocks[j]['lines'][0] = rest
167 165 blocks[j]['key'] = key
168 166 keywidth = max(keywidth, len(key))
169 167 j += 1
170 168
171 169 for block in blocks[i:j]:
172 170 block['keywidth'] = keywidth
173 171 i = j + 1
174 172
175 173 return blocks
176 174
177 175
178 176 def updateoptionlists(blocks):
179 177 i = 0
180 178 while i < len(blocks):
181 179 if blocks[i]['type'] != 'option':
182 180 i += 1
183 181 continue
184 182
185 183 optstrwidth = 0
186 184 j = i
187 185 while j < len(blocks) and blocks[j]['type'] == 'option':
188 186 m = _optionre.match(blocks[j]['lines'][0])
189 187
190 188 shortoption = m.group(2)
191 189 group3 = m.group(3)
192 190 longoption = group3[2:].strip()
193 191 desc = m.group(6).strip()
194 192 longoptionarg = m.group(5).strip()
195 193 blocks[j]['lines'][0] = desc
196 194
197 195 noshortop = ''
198 196 if not shortoption:
199 197 noshortop = ' '
200 198
201 199 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
202 200 ("%s--%s %s") % (noshortop, longoption,
203 201 longoptionarg))
204 202 opt = opt.rstrip()
205 203 blocks[j]['optstr'] = opt
206 204 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
207 205 j += 1
208 206
209 207 for block in blocks[i:j]:
210 208 block['optstrwidth'] = optstrwidth
211 209 i = j + 1
212 210 return blocks
213 211
214 212 def prunecontainers(blocks, keep):
215 213 """Prune unwanted containers.
216 214
217 215 The blocks must have a 'type' field, i.e., they should have been
218 216 run through findliteralblocks first.
219 217 """
220 218 pruned = []
221 219 i = 0
222 220 while i + 1 < len(blocks):
223 221 # Searching for a block that looks like this:
224 222 #
225 223 # +-------+---------------------------+
226 224 # | ".. container ::" type |
227 225 # +---+ |
228 226 # | blocks |
229 227 # +-------------------------------+
230 228 if (blocks[i]['type'] == 'paragraph' and
231 229 blocks[i]['lines'][0].startswith('.. container::')):
232 230 indent = blocks[i]['indent']
233 231 adjustment = blocks[i + 1]['indent'] - indent
234 232 containertype = blocks[i]['lines'][0][15:]
235 233 prune = containertype not in keep
236 234 if prune:
237 235 pruned.append(containertype)
238 236
239 237 # Always delete "..container:: type" block
240 238 del blocks[i]
241 239 j = i
242 240 i -= 1
243 241 while j < len(blocks) and blocks[j]['indent'] > indent:
244 242 if prune:
245 243 del blocks[j]
246 244 else:
247 245 blocks[j]['indent'] -= adjustment
248 246 j += 1
249 247 i += 1
250 248 return blocks, pruned
251 249
252 250
253 251 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
254 252
255 253 def findtables(blocks):
256 254 '''Find simple tables
257 255
258 256 Only simple one-line table elements are supported
259 257 '''
260 258
261 259 for block in blocks:
262 260 # Searching for a block that looks like this:
263 261 #
264 262 # === ==== ===
265 263 # A B C
266 264 # === ==== === <- optional
267 265 # 1 2 3
268 266 # x y z
269 267 # === ==== ===
270 268 if (block['type'] == 'paragraph' and
271 269 len(block['lines']) > 4 and
272 270 _tablere.match(block['lines'][0]) and
273 271 block['lines'][0] == block['lines'][-1]):
274 272 block['type'] = 'table'
275 273 block['header'] = False
276 274 div = block['lines'][0]
277 275 columns = [x for x in xrange(len(div))
278 276 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
279 277 rows = []
280 278 for l in block['lines'][1:-1]:
281 279 if l == div:
282 280 block['header'] = True
283 281 continue
284 282 row = []
285 283 for n, start in enumerate(columns):
286 284 if n + 1 < len(columns):
287 285 row.append(l[start:columns[n + 1]].strip())
288 286 else:
289 287 row.append(l[start:].strip())
290 288 rows.append(row)
291 289 block['table'] = rows
292 290
293 291 return blocks
294 292
295 293 def findsections(blocks):
296 294 """Finds sections.
297 295
298 296 The blocks must have a 'type' field, i.e., they should have been
299 297 run through findliteralblocks first.
300 298 """
301 299 for block in blocks:
302 300 # Searching for a block that looks like this:
303 301 #
304 302 # +------------------------------+
305 303 # | Section title |
306 304 # | ------------- |
307 305 # +------------------------------+
308 306 if (block['type'] == 'paragraph' and
309 307 len(block['lines']) == 2 and
310 308 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
311 309 _sectionre.match(block['lines'][1])):
312 310 block['underline'] = block['lines'][1][0]
313 311 block['type'] = 'section'
314 312 del block['lines'][1]
315 313 return blocks
316 314
317 315
318 316 def inlineliterals(blocks):
319 317 substs = [('``', '"')]
320 318 for b in blocks:
321 319 if b['type'] in ('paragraph', 'section'):
322 320 b['lines'] = [replace(l, substs) for l in b['lines']]
323 321 return blocks
324 322
325 323
326 324 def hgrole(blocks):
327 325 substs = [(':hg:`', '"hg '), ('`', '"')]
328 326 for b in blocks:
329 327 if b['type'] in ('paragraph', 'section'):
330 328 # Turn :hg:`command` into "hg command". This also works
331 329 # when there is a line break in the command and relies on
332 330 # the fact that we have no stray back-quotes in the input
333 331 # (run the blocks through inlineliterals first).
334 332 b['lines'] = [replace(l, substs) for l in b['lines']]
335 333 return blocks
336 334
337 335
338 336 def addmargins(blocks):
339 337 """Adds empty blocks for vertical spacing.
340 338
341 339 This groups bullets, options, and definitions together with no vertical
342 340 space between them, and adds an empty block between all other blocks.
343 341 """
344 342 i = 1
345 343 while i < len(blocks):
346 344 if (blocks[i]['type'] == blocks[i - 1]['type'] and
347 345 blocks[i]['type'] in ('bullet', 'option', 'field')):
348 346 i += 1
349 347 else:
350 348 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
351 349 i += 2
352 350 return blocks
353 351
354 352 def prunecomments(blocks):
355 353 """Remove comments."""
356 354 i = 0
357 355 while i < len(blocks):
358 356 b = blocks[i]
359 357 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
360 358 b['lines'] == ['..']):
361 359 del blocks[i]
362 360 if i < len(blocks) and blocks[i]['type'] == 'margin':
363 361 del blocks[i]
364 362 else:
365 363 i += 1
366 364 return blocks
367 365
368 366 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
369 367 r"error|hint|important|note|tip|warning)::",
370 368 flags=re.IGNORECASE)
371 369
372 370 def findadmonitions(blocks):
373 371 """
374 372 Makes the type of the block an admonition block if
375 373 the first line is an admonition directive
376 374 """
377 375 i = 0
378 376 while i < len(blocks):
379 377 m = _admonitionre.match(blocks[i]['lines'][0])
380 378 if m:
381 379 blocks[i]['type'] = 'admonition'
382 380 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
383 381
384 382 firstline = blocks[i]['lines'][0][m.end() + 1:]
385 383 if firstline:
386 384 blocks[i]['lines'].insert(1, ' ' + firstline)
387 385
388 386 blocks[i]['admonitiontitle'] = admonitiontitle
389 387 del blocks[i]['lines'][0]
390 388 i = i + 1
391 389 return blocks
392 390
393 391 _admonitiontitles = {'attention': _('Attention:'),
394 392 'caution': _('Caution:'),
395 393 'danger': _('!Danger!') ,
396 394 'error': _('Error:'),
397 395 'hint': _('Hint:'),
398 396 'important': _('Important:'),
399 397 'note': _('Note:'),
400 398 'tip': _('Tip:'),
401 399 'warning': _('Warning!')}
402 400
403 401 def formatoption(block, width):
404 402 desc = ' '.join(map(str.strip, block['lines']))
405 403 colwidth = encoding.colwidth(block['optstr'])
406 404 usablewidth = width - 1
407 405 hanging = block['optstrwidth']
408 406 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
409 407 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
410 408 return ' %s' % (util.wrap(desc, usablewidth,
411 409 initindent=initindent,
412 410 hangindent=hangindent))
413 411
414 412 def formatblock(block, width):
415 413 """Format a block according to width."""
416 414 if width <= 0:
417 415 width = 78
418 416 indent = ' ' * block['indent']
419 417 if block['type'] == 'admonition':
420 418 admonition = _admonitiontitles[block['admonitiontitle']]
421 419 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
422 420
423 421 defindent = indent + hang * ' '
424 422 text = ' '.join(map(str.strip, block['lines']))
425 423 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
426 424 initindent=defindent,
427 425 hangindent=defindent))
428 426 if block['type'] == 'margin':
429 427 return ''
430 428 if block['type'] == 'literal':
431 429 indent += ' '
432 430 return indent + ('\n' + indent).join(block['lines'])
433 431 if block['type'] == 'section':
434 432 underline = encoding.colwidth(block['lines'][0]) * block['underline']
435 433 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
436 434 if block['type'] == 'table':
437 435 table = block['table']
438 436 # compute column widths
439 437 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
440 438 text = ''
441 439 span = sum(widths) + len(widths) - 1
442 440 indent = ' ' * block['indent']
443 441 hang = ' ' * (len(indent) + span - widths[-1])
444 442 f = ' '.join('%%-%ds' % n for n in widths)
445 443
446 444 for row in table:
447 445 l = f % tuple(row)
448 446 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
449 447 if not text and block['header']:
450 448 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
451 449 else:
452 450 text += l + "\n"
453 451 return text
454 452 if block['type'] == 'definition':
455 453 term = indent + block['lines'][0]
456 454 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
457 455 defindent = indent + hang * ' '
458 456 text = ' '.join(map(str.strip, block['lines'][1:]))
459 457 return '%s\n%s' % (term, util.wrap(text, width=width,
460 458 initindent=defindent,
461 459 hangindent=defindent))
462 460 subindent = indent
463 461 if block['type'] == 'bullet':
464 462 if block['lines'][0].startswith('| '):
465 463 # Remove bullet for line blocks and add no extra
466 464 # indention.
467 465 block['lines'][0] = block['lines'][0][2:]
468 466 else:
469 467 m = _bulletre.match(block['lines'][0])
470 468 subindent = indent + m.end() * ' '
471 469 elif block['type'] == 'field':
472 470 keywidth = block['keywidth']
473 471 key = block['key']
474 472
475 473 subindent = indent + _fieldwidth * ' '
476 474 if len(key) + 2 > _fieldwidth:
477 475 # key too large, use full line width
478 476 key = key.ljust(width)
479 477 elif keywidth + 2 < _fieldwidth:
480 478 # all keys are small, add only two spaces
481 479 key = key.ljust(keywidth + 2)
482 480 subindent = indent + (keywidth + 2) * ' '
483 481 else:
484 482 # mixed sizes, use fieldwidth for this one
485 483 key = key.ljust(_fieldwidth)
486 484 block['lines'][0] = key + block['lines'][0]
487 485 elif block['type'] == 'option':
488 486 return formatoption(block, width)
489 487
490 488 text = ' '.join(map(str.strip, block['lines']))
491 489 return util.wrap(text, width=width,
492 490 initindent=indent,
493 491 hangindent=subindent)
494 492
495 493 def parse(text, indent=0, keep=None):
496 494 """Parse text into a list of blocks"""
497 495 pruned = []
498 496 blocks = findblocks(text)
499 497 for b in blocks:
500 498 b['indent'] += indent
501 499 blocks = findliteralblocks(blocks)
502 500 blocks = findtables(blocks)
503 501 blocks, pruned = prunecontainers(blocks, keep or [])
504 502 blocks = findsections(blocks)
505 503 blocks = inlineliterals(blocks)
506 504 blocks = hgrole(blocks)
507 505 blocks = splitparagraphs(blocks)
508 506 blocks = updatefieldlists(blocks)
509 507 blocks = updateoptionlists(blocks)
510 508 blocks = addmargins(blocks)
511 509 blocks = prunecomments(blocks)
512 510 blocks = findadmonitions(blocks)
513 511 return blocks, pruned
514 512
515 513 def formatblocks(blocks, width):
516 514 text = '\n'.join(formatblock(b, width) for b in blocks)
517 515 return text
518 516
519 517 def format(text, width, indent=0, keep=None):
520 518 """Parse and format the text according to width."""
521 519 blocks, pruned = parse(text, indent, keep or [])
522 520 text = '\n'.join(formatblock(b, width) for b in blocks)
523 521 if keep is None:
524 522 return text
525 523 else:
526 524 return text, pruned
527 525
528 526 def getsections(blocks):
529 527 '''return a list of (section name, nesting level, blocks) tuples'''
530 528 nest = ""
531 529 level = 0
532 530 secs = []
533 531 for b in blocks:
534 532 if b['type'] == 'section':
535 533 i = b['underline']
536 534 if i not in nest:
537 535 nest += i
538 536 level = nest.index(i) + 1
539 537 nest = nest[:level]
540 538 secs.append((b['lines'][0], level, [b]))
541 539 else:
542 540 if not secs:
543 541 # add an initial empty section
544 542 secs = [('', 0, [])]
545 543 secs[-1][2].append(b)
546 544 return secs
547 545
548 546 def decorateblocks(blocks, width):
549 547 '''generate a list of (section name, line text) pairs for search'''
550 548 lines = []
551 549 for s in getsections(blocks):
552 550 section = s[0]
553 551 text = formatblocks(s[2], width)
554 552 lines.append([(section, l) for l in text.splitlines(True)])
555 553 return lines
556 554
557 555 def maketable(data, indent=0, header=False):
558 556 '''Generate an RST table for the given table data'''
559 557
560 558 widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)]
561 559 indent = ' ' * indent
562 560 f = indent + ' '.join('%%-%ds' % w for w in widths) + '\n'
563 561 div = indent + ' '.join('=' * w for w in widths) + '\n'
564 562
565 563 out = [div]
566 564 for row in data:
567 565 out.append(f % tuple(row))
568 566 if header and len(data) > 1:
569 567 out.insert(2, div)
570 568 out.append(div)
571 569 return ''.join(out)
General Comments 0
You need to be logged in to leave comments. Login now