##// END OF EJS Templates
minirst: drop debug code...
Matt Mackall -
r15038:3f4d337c default
parent child Browse files
Show More
@@ -1,579 +1,555
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 import re, sys
21 import re
22 22 import util, encoding
23 23 from i18n import _
24 24
25 25
26 26 def replace(text, substs):
27 27 utext = text.decode(encoding.encoding)
28 28 for f, t in substs:
29 29 utext = utext.replace(f, t)
30 30 return utext.encode(encoding.encoding)
31 31
32 32
33 33 _blockre = re.compile(r"\n(?:\s*\n)+")
34 34
35 35 def findblocks(text):
36 36 """Find continuous blocks of lines in text.
37 37
38 38 Returns a list of dictionaries representing the blocks. Each block
39 39 has an 'indent' field and a 'lines' field.
40 40 """
41 41 blocks = []
42 42 for b in _blockre.split(text.lstrip('\n').rstrip()):
43 43 lines = b.splitlines()
44 44 indent = min((len(l) - len(l.lstrip())) for l in lines)
45 45 lines = [l[indent:] for l in lines]
46 46 blocks.append(dict(indent=indent, lines=lines))
47 47 return blocks
48 48
49 49
50 50 def findliteralblocks(blocks):
51 51 """Finds literal blocks and adds a 'type' field to the blocks.
52 52
53 53 Literal blocks are given the type 'literal', all other blocks are
54 54 given type the 'paragraph'.
55 55 """
56 56 i = 0
57 57 while i < len(blocks):
58 58 # Searching for a block that looks like this:
59 59 #
60 60 # +------------------------------+
61 61 # | paragraph |
62 62 # | (ends with "::") |
63 63 # +------------------------------+
64 64 # +---------------------------+
65 65 # | indented literal block |
66 66 # +---------------------------+
67 67 blocks[i]['type'] = 'paragraph'
68 68 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
69 69 indent = blocks[i]['indent']
70 70 adjustment = blocks[i + 1]['indent'] - indent
71 71
72 72 if blocks[i]['lines'] == ['::']:
73 73 # Expanded form: remove block
74 74 del blocks[i]
75 75 i -= 1
76 76 elif blocks[i]['lines'][-1].endswith(' ::'):
77 77 # Partially minimized form: remove space and both
78 78 # colons.
79 79 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
80 80 else:
81 81 # Fully minimized form: remove just one colon.
82 82 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
83 83
84 84 # List items are formatted with a hanging indent. We must
85 85 # correct for this here while we still have the original
86 86 # information on the indentation of the subsequent literal
87 87 # blocks available.
88 88 m = _bulletre.match(blocks[i]['lines'][0])
89 89 if m:
90 90 indent += m.end()
91 91 adjustment -= m.end()
92 92
93 93 # Mark the following indented blocks.
94 94 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
95 95 blocks[i + 1]['type'] = 'literal'
96 96 blocks[i + 1]['indent'] -= adjustment
97 97 i += 1
98 98 i += 1
99 99 return blocks
100 100
101 101 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
102 102 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
103 103 r'((.*) +)(.*)$')
104 104 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
105 105 _definitionre = re.compile(r'[^ ]')
106 106 _tablere = re.compile(r'(=+\s+)*=+')
107 107
108 108 def splitparagraphs(blocks):
109 109 """Split paragraphs into lists."""
110 110 # Tuples with (list type, item regexp, single line items?). Order
111 111 # matters: definition lists has the least specific regexp and must
112 112 # come last.
113 113 listtypes = [('bullet', _bulletre, True),
114 114 ('option', _optionre, True),
115 115 ('field', _fieldre, True),
116 116 ('definition', _definitionre, False)]
117 117
118 118 def match(lines, i, itemre, singleline):
119 119 """Does itemre match an item at line i?
120 120
121 121 A list item can be followed by an idented line or another list
122 122 item (but only if singleline is True).
123 123 """
124 124 line1 = lines[i]
125 125 line2 = i + 1 < len(lines) and lines[i + 1] or ''
126 126 if not itemre.match(line1):
127 127 return False
128 128 if singleline:
129 129 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
130 130 else:
131 131 return line2.startswith(' ')
132 132
133 133 i = 0
134 134 while i < len(blocks):
135 135 if blocks[i]['type'] == 'paragraph':
136 136 lines = blocks[i]['lines']
137 137 for type, itemre, singleline in listtypes:
138 138 if match(lines, 0, itemre, singleline):
139 139 items = []
140 140 for j, line in enumerate(lines):
141 141 if match(lines, j, itemre, singleline):
142 142 items.append(dict(type=type, lines=[],
143 143 indent=blocks[i]['indent']))
144 144 items[-1]['lines'].append(line)
145 145 blocks[i:i + 1] = items
146 146 break
147 147 i += 1
148 148 return blocks
149 149
150 150
151 151 _fieldwidth = 12
152 152
153 153 def updatefieldlists(blocks):
154 154 """Find key and maximum key width for field lists."""
155 155 i = 0
156 156 while i < len(blocks):
157 157 if blocks[i]['type'] != 'field':
158 158 i += 1
159 159 continue
160 160
161 161 keywidth = 0
162 162 j = i
163 163 while j < len(blocks) and blocks[j]['type'] == 'field':
164 164 m = _fieldre.match(blocks[j]['lines'][0])
165 165 key, rest = m.groups()
166 166 blocks[j]['lines'][0] = rest
167 167 blocks[j]['key'] = key
168 168 keywidth = max(keywidth, len(key))
169 169 j += 1
170 170
171 171 for block in blocks[i:j]:
172 172 block['keywidth'] = keywidth
173 173 i = j + 1
174 174
175 175 return blocks
176 176
177 177
178 178 def updateoptionlists(blocks):
179 179 i = 0
180 180 while i < len(blocks):
181 181 if blocks[i]['type'] != 'option':
182 182 i += 1
183 183 continue
184 184
185 185 optstrwidth = 0
186 186 j = i
187 187 while j < len(blocks) and blocks[j]['type'] == 'option':
188 188 m = _optionre.match(blocks[j]['lines'][0])
189 189
190 190 shortoption = m.group(2)
191 191 group3 = m.group(3)
192 192 longoption = group3[2:].strip()
193 193 desc = m.group(6).strip()
194 194 longoptionarg = m.group(5).strip()
195 195 blocks[j]['lines'][0] = desc
196 196
197 197 noshortop = ''
198 198 if not shortoption:
199 199 noshortop = ' '
200 200
201 201 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
202 202 ("%s--%s %s") % (noshortop, longoption,
203 203 longoptionarg))
204 204 opt = opt.rstrip()
205 205 blocks[j]['optstr'] = opt
206 206 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
207 207 j += 1
208 208
209 209 for block in blocks[i:j]:
210 210 block['optstrwidth'] = optstrwidth
211 211 i = j + 1
212 212 return blocks
213 213
214 214 def prunecontainers(blocks, keep):
215 215 """Prune unwanted containers.
216 216
217 217 The blocks must have a 'type' field, i.e., they should have been
218 218 run through findliteralblocks first.
219 219 """
220 220 pruned = []
221 221 i = 0
222 222 while i + 1 < len(blocks):
223 223 # Searching for a block that looks like this:
224 224 #
225 225 # +-------+---------------------------+
226 226 # | ".. container ::" type |
227 227 # +---+ |
228 228 # | blocks |
229 229 # +-------------------------------+
230 230 if (blocks[i]['type'] == 'paragraph' and
231 231 blocks[i]['lines'][0].startswith('.. container::')):
232 232 indent = blocks[i]['indent']
233 233 adjustment = blocks[i + 1]['indent'] - indent
234 234 containertype = blocks[i]['lines'][0][15:]
235 235 prune = containertype not in keep
236 236 if prune:
237 237 pruned.append(containertype)
238 238
239 239 # Always delete "..container:: type" block
240 240 del blocks[i]
241 241 j = i
242 242 while j < len(blocks) and blocks[j]['indent'] > indent:
243 243 if prune:
244 244 del blocks[j]
245 245 i -= 1 # adjust outer index
246 246 else:
247 247 blocks[j]['indent'] -= adjustment
248 248 j += 1
249 249 i += 1
250 250 return blocks, pruned
251 251
252 252
253 253 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
254 254
255 255 def findtables(blocks):
256 256 '''Find simple tables
257 257
258 258 Only simple one-line table elements are supported
259 259 '''
260 260
261 261 for block in blocks:
262 262 # Searching for a block that looks like this:
263 263 #
264 264 # === ==== ===
265 265 # A B C
266 266 # === ==== === <- optional
267 267 # 1 2 3
268 268 # x y z
269 269 # === ==== ===
270 270 if (block['type'] == 'paragraph' and
271 271 len(block['lines']) > 4 and
272 272 _tablere.match(block['lines'][0]) and
273 273 block['lines'][0] == block['lines'][-1]):
274 274 block['type'] = 'table'
275 275 block['header'] = False
276 276 div = block['lines'][0]
277 277 columns = [x for x in xrange(len(div))
278 278 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
279 279 rows = []
280 280 for l in block['lines'][1:-1]:
281 281 if l == div:
282 282 block['header'] = True
283 283 continue
284 284 row = []
285 285 for n, start in enumerate(columns):
286 286 if n + 1 < len(columns):
287 287 row.append(l[start:columns[n + 1]].strip())
288 288 else:
289 289 row.append(l[start:].strip())
290 290 rows.append(row)
291 291 block['table'] = rows
292 292
293 293 return blocks
294 294
295 295 def findsections(blocks):
296 296 """Finds sections.
297 297
298 298 The blocks must have a 'type' field, i.e., they should have been
299 299 run through findliteralblocks first.
300 300 """
301 301 for block in blocks:
302 302 # Searching for a block that looks like this:
303 303 #
304 304 # +------------------------------+
305 305 # | Section title |
306 306 # | ------------- |
307 307 # +------------------------------+
308 308 if (block['type'] == 'paragraph' and
309 309 len(block['lines']) == 2 and
310 310 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
311 311 _sectionre.match(block['lines'][1])):
312 312 block['underline'] = block['lines'][1][0]
313 313 block['type'] = 'section'
314 314 del block['lines'][1]
315 315 return blocks
316 316
317 317
318 318 def inlineliterals(blocks):
319 319 substs = [('``', '"')]
320 320 for b in blocks:
321 321 if b['type'] in ('paragraph', 'section'):
322 322 b['lines'] = [replace(l, substs) for l in b['lines']]
323 323 return blocks
324 324
325 325
326 326 def hgrole(blocks):
327 327 substs = [(':hg:`', '"hg '), ('`', '"')]
328 328 for b in blocks:
329 329 if b['type'] in ('paragraph', 'section'):
330 330 # Turn :hg:`command` into "hg command". This also works
331 331 # when there is a line break in the command and relies on
332 332 # the fact that we have no stray back-quotes in the input
333 333 # (run the blocks through inlineliterals first).
334 334 b['lines'] = [replace(l, substs) for l in b['lines']]
335 335 return blocks
336 336
337 337
338 338 def addmargins(blocks):
339 339 """Adds empty blocks for vertical spacing.
340 340
341 341 This groups bullets, options, and definitions together with no vertical
342 342 space between them, and adds an empty block between all other blocks.
343 343 """
344 344 i = 1
345 345 while i < len(blocks):
346 346 if (blocks[i]['type'] == blocks[i - 1]['type'] and
347 347 blocks[i]['type'] in ('bullet', 'option', 'field')):
348 348 i += 1
349 349 else:
350 350 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
351 351 i += 2
352 352 return blocks
353 353
354 354 def prunecomments(blocks):
355 355 """Remove comments."""
356 356 i = 0
357 357 while i < len(blocks):
358 358 b = blocks[i]
359 359 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
360 360 b['lines'] == ['..']):
361 361 del blocks[i]
362 362 if i < len(blocks) and blocks[i]['type'] == 'margin':
363 363 del blocks[i]
364 364 else:
365 365 i += 1
366 366 return blocks
367 367
368 368 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
369 369 r"error|hint|important|note|tip|warning)::",
370 370 flags=re.IGNORECASE)
371 371
372 372 def findadmonitions(blocks):
373 373 """
374 374 Makes the type of the block an admonition block if
375 375 the first line is an admonition directive
376 376 """
377 377 i = 0
378 378 while i < len(blocks):
379 379 m = _admonitionre.match(blocks[i]['lines'][0])
380 380 if m:
381 381 blocks[i]['type'] = 'admonition'
382 382 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
383 383
384 384 firstline = blocks[i]['lines'][0][m.end() + 1:]
385 385 if firstline:
386 386 blocks[i]['lines'].insert(1, ' ' + firstline)
387 387
388 388 blocks[i]['admonitiontitle'] = admonitiontitle
389 389 del blocks[i]['lines'][0]
390 390 i = i + 1
391 391 return blocks
392 392
393 393 _admonitiontitles = {'attention': _('Attention:'),
394 394 'caution': _('Caution:'),
395 395 'danger': _('!Danger!') ,
396 396 'error': _('Error:'),
397 397 'hint': _('Hint:'),
398 398 'important': _('Important:'),
399 399 'note': _('Note:'),
400 400 'tip': _('Tip:'),
401 401 'warning': _('Warning!')}
402 402
403 403 def formatoption(block, width):
404 404 desc = ' '.join(map(str.strip, block['lines']))
405 405 colwidth = encoding.colwidth(block['optstr'])
406 406 usablewidth = width - 1
407 407 hanging = block['optstrwidth']
408 408 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
409 409 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
410 410 return ' %s' % (util.wrap(desc, usablewidth,
411 411 initindent=initindent,
412 412 hangindent=hangindent))
413 413
414 414 def formatblock(block, width):
415 415 """Format a block according to width."""
416 416 if width <= 0:
417 417 width = 78
418 418 indent = ' ' * block['indent']
419 419 if block['type'] == 'admonition':
420 420 admonition = _admonitiontitles[block['admonitiontitle']]
421 421 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
422 422
423 423 defindent = indent + hang * ' '
424 424 text = ' '.join(map(str.strip, block['lines']))
425 425 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
426 426 initindent=defindent,
427 427 hangindent=defindent))
428 428 if block['type'] == 'margin':
429 429 return ''
430 430 if block['type'] == 'literal':
431 431 indent += ' '
432 432 return indent + ('\n' + indent).join(block['lines'])
433 433 if block['type'] == 'section':
434 434 underline = encoding.colwidth(block['lines'][0]) * block['underline']
435 435 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
436 436 if block['type'] == 'table':
437 437 table = block['table']
438 438 # compute column widths
439 439 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
440 440 text = ''
441 441 span = sum(widths) + len(widths) - 1
442 442 indent = ' ' * block['indent']
443 443 hang = ' ' * (len(indent) + span - widths[-1])
444 444 f = ' '.join('%%-%ds' % n for n in widths)
445 445
446 446 for row in table:
447 447 l = f % tuple(row)
448 448 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
449 449 if not text and block['header']:
450 450 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
451 451 else:
452 452 text += l + "\n"
453 453 return text
454 454 if block['type'] == 'definition':
455 455 term = indent + block['lines'][0]
456 456 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
457 457 defindent = indent + hang * ' '
458 458 text = ' '.join(map(str.strip, block['lines'][1:]))
459 459 return '%s\n%s' % (term, util.wrap(text, width=width,
460 460 initindent=defindent,
461 461 hangindent=defindent))
462 462 subindent = indent
463 463 if block['type'] == 'bullet':
464 464 if block['lines'][0].startswith('| '):
465 465 # Remove bullet for line blocks and add no extra
466 466 # indention.
467 467 block['lines'][0] = block['lines'][0][2:]
468 468 else:
469 469 m = _bulletre.match(block['lines'][0])
470 470 subindent = indent + m.end() * ' '
471 471 elif block['type'] == 'field':
472 472 keywidth = block['keywidth']
473 473 key = block['key']
474 474
475 475 subindent = indent + _fieldwidth * ' '
476 476 if len(key) + 2 > _fieldwidth:
477 477 # key too large, use full line width
478 478 key = key.ljust(width)
479 479 elif keywidth + 2 < _fieldwidth:
480 480 # all keys are small, add only two spaces
481 481 key = key.ljust(keywidth + 2)
482 482 subindent = indent + (keywidth + 2) * ' '
483 483 else:
484 484 # mixed sizes, use fieldwidth for this one
485 485 key = key.ljust(_fieldwidth)
486 486 block['lines'][0] = key + block['lines'][0]
487 487 elif block['type'] == 'option':
488 488 return formatoption(block, width)
489 489
490 490 text = ' '.join(map(str.strip, block['lines']))
491 491 return util.wrap(text, width=width,
492 492 initindent=indent,
493 493 hangindent=subindent)
494 494
495 495 def parse(text, indent=0, keep=None):
496 496 """Parse text into a list of blocks"""
497 497 pruned = []
498 498 blocks = findblocks(text)
499 499 for b in blocks:
500 500 b['indent'] += indent
501 501 blocks = findliteralblocks(blocks)
502 502 blocks = findtables(blocks)
503 503 blocks, pruned = prunecontainers(blocks, keep or [])
504 504 blocks = findsections(blocks)
505 505 blocks = inlineliterals(blocks)
506 506 blocks = hgrole(blocks)
507 507 blocks = splitparagraphs(blocks)
508 508 blocks = updatefieldlists(blocks)
509 509 blocks = updateoptionlists(blocks)
510 510 blocks = addmargins(blocks)
511 511 blocks = prunecomments(blocks)
512 512 blocks = findadmonitions(blocks)
513 513 return blocks, pruned
514 514
515 515 def formatblocks(blocks, width):
516 516 text = '\n'.join(formatblock(b, width) for b in blocks)
517 517 return text
518 518
519 519 def format(text, width, indent=0, keep=None):
520 520 """Parse and format the text according to width."""
521 521 blocks, pruned = parse(text, indent, keep or [])
522 522 text = '\n'.join(formatblock(b, width) for b in blocks)
523 523 if keep is None:
524 524 return text
525 525 else:
526 526 return text, pruned
527 527
528 528 def getsections(blocks):
529 529 '''return a list of (section name, nesting level, blocks) tuples'''
530 530 nest = ""
531 531 level = 0
532 532 secs = []
533 533 for b in blocks:
534 534 if b['type'] == 'section':
535 535 i = b['underline']
536 536 if i not in nest:
537 537 nest += i
538 538 level = nest.index(i) + 1
539 539 nest = nest[:level]
540 540 secs.append((b['lines'][0], level, [b]))
541 541 else:
542 542 if not secs:
543 543 # add an initial empty section
544 544 secs = [('', 0, [])]
545 545 secs[-1][2].append(b)
546 546 return secs
547 547
548 548 def decorateblocks(blocks, width):
549 549 '''generate a list of (section name, line text) pairs for search'''
550 550 lines = []
551 551 for s in getsections(blocks):
552 552 section = s[0]
553 553 text = formatblocks(s[2], width)
554 554 lines.append([(section, l) for l in text.splitlines(True)])
555 555 return lines
556
557 if __name__ == "__main__":
558 from pprint import pprint
559
560 def debug(func, *args):
561 blocks = func(*args)
562 print "*** after %s:" % func.__name__
563 pprint(blocks)
564 print
565 return blocks
566
567 text = sys.stdin.read()
568 blocks = debug(findblocks, text)
569 blocks = debug(findliteralblocks, blocks)
570 blocks, pruned = debug(prunecontainers, blocks, sys.argv[1:])
571 blocks = debug(inlineliterals, blocks)
572 blocks = debug(splitparagraphs, blocks)
573 blocks = debug(updatefieldlists, blocks)
574 blocks = debug(updateoptionlists, blocks)
575 blocks = debug(findsections, blocks)
576 blocks = debug(addmargins, blocks)
577 blocks = debug(prunecomments, blocks)
578 blocks = debug(findadmonitions, blocks)
579 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now