##// END OF EJS Templates
minirst: add getsections helper
Matt Mackall -
r15014:a814e986 default
parent child Browse files
Show More
@@ -1,491 +1,510 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 21 import re, sys
22 22 import util, encoding
23 23 from i18n import _
24 24
25 25
26 26 def replace(text, substs):
27 27 utext = text.decode(encoding.encoding)
28 28 for f, t in substs:
29 29 utext = utext.replace(f, t)
30 30 return utext.encode(encoding.encoding)
31 31
32 32
33 33 _blockre = re.compile(r"\n(?:\s*\n)+")
34 34
35 35 def findblocks(text):
36 36 """Find continuous blocks of lines in text.
37 37
38 38 Returns a list of dictionaries representing the blocks. Each block
39 39 has an 'indent' field and a 'lines' field.
40 40 """
41 41 blocks = []
42 42 for b in _blockre.split(text.strip()):
43 43 lines = b.splitlines()
44 44 indent = min((len(l) - len(l.lstrip())) for l in lines)
45 45 lines = [l[indent:] for l in lines]
46 46 blocks.append(dict(indent=indent, lines=lines))
47 47 return blocks
48 48
49 49
50 50 def findliteralblocks(blocks):
51 51 """Finds literal blocks and adds a 'type' field to the blocks.
52 52
53 53 Literal blocks are given the type 'literal', all other blocks are
54 54 given type the 'paragraph'.
55 55 """
56 56 i = 0
57 57 while i < len(blocks):
58 58 # Searching for a block that looks like this:
59 59 #
60 60 # +------------------------------+
61 61 # | paragraph |
62 62 # | (ends with "::") |
63 63 # +------------------------------+
64 64 # +---------------------------+
65 65 # | indented literal block |
66 66 # +---------------------------+
67 67 blocks[i]['type'] = 'paragraph'
68 68 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
69 69 indent = blocks[i]['indent']
70 70 adjustment = blocks[i + 1]['indent'] - indent
71 71
72 72 if blocks[i]['lines'] == ['::']:
73 73 # Expanded form: remove block
74 74 del blocks[i]
75 75 i -= 1
76 76 elif blocks[i]['lines'][-1].endswith(' ::'):
77 77 # Partially minimized form: remove space and both
78 78 # colons.
79 79 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
80 80 else:
81 81 # Fully minimized form: remove just one colon.
82 82 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
83 83
84 84 # List items are formatted with a hanging indent. We must
85 85 # correct for this here while we still have the original
86 86 # information on the indentation of the subsequent literal
87 87 # blocks available.
88 88 m = _bulletre.match(blocks[i]['lines'][0])
89 89 if m:
90 90 indent += m.end()
91 91 adjustment -= m.end()
92 92
93 93 # Mark the following indented blocks.
94 94 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
95 95 blocks[i + 1]['type'] = 'literal'
96 96 blocks[i + 1]['indent'] -= adjustment
97 97 i += 1
98 98 i += 1
99 99 return blocks
100 100
101 101 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
102 102 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
103 103 r'((.*) +)(.*)$')
104 104 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
105 105 _definitionre = re.compile(r'[^ ]')
106 106
107 107 def splitparagraphs(blocks):
108 108 """Split paragraphs into lists."""
109 109 # Tuples with (list type, item regexp, single line items?). Order
110 110 # matters: definition lists has the least specific regexp and must
111 111 # come last.
112 112 listtypes = [('bullet', _bulletre, True),
113 113 ('option', _optionre, True),
114 114 ('field', _fieldre, True),
115 115 ('definition', _definitionre, False)]
116 116
117 117 def match(lines, i, itemre, singleline):
118 118 """Does itemre match an item at line i?
119 119
120 120 A list item can be followed by an idented line or another list
121 121 item (but only if singleline is True).
122 122 """
123 123 line1 = lines[i]
124 124 line2 = i + 1 < len(lines) and lines[i + 1] or ''
125 125 if not itemre.match(line1):
126 126 return False
127 127 if singleline:
128 128 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
129 129 else:
130 130 return line2.startswith(' ')
131 131
132 132 i = 0
133 133 while i < len(blocks):
134 134 if blocks[i]['type'] == 'paragraph':
135 135 lines = blocks[i]['lines']
136 136 for type, itemre, singleline in listtypes:
137 137 if match(lines, 0, itemre, singleline):
138 138 items = []
139 139 for j, line in enumerate(lines):
140 140 if match(lines, j, itemre, singleline):
141 141 items.append(dict(type=type, lines=[],
142 142 indent=blocks[i]['indent']))
143 143 items[-1]['lines'].append(line)
144 144 blocks[i:i + 1] = items
145 145 break
146 146 i += 1
147 147 return blocks
148 148
149 149
150 150 _fieldwidth = 12
151 151
152 152 def updatefieldlists(blocks):
153 153 """Find key and maximum key width for field lists."""
154 154 i = 0
155 155 while i < len(blocks):
156 156 if blocks[i]['type'] != 'field':
157 157 i += 1
158 158 continue
159 159
160 160 keywidth = 0
161 161 j = i
162 162 while j < len(blocks) and blocks[j]['type'] == 'field':
163 163 m = _fieldre.match(blocks[j]['lines'][0])
164 164 key, rest = m.groups()
165 165 blocks[j]['lines'][0] = rest
166 166 blocks[j]['key'] = key
167 167 keywidth = max(keywidth, len(key))
168 168 j += 1
169 169
170 170 for block in blocks[i:j]:
171 171 block['keywidth'] = keywidth
172 172 i = j + 1
173 173
174 174 return blocks
175 175
176 176
177 177 def updateoptionlists(blocks):
178 178 i = 0
179 179 while i < len(blocks):
180 180 if blocks[i]['type'] != 'option':
181 181 i += 1
182 182 continue
183 183
184 184 optstrwidth = 0
185 185 j = i
186 186 while j < len(blocks) and blocks[j]['type'] == 'option':
187 187 m = _optionre.match(blocks[j]['lines'][0])
188 188
189 189 shortoption = m.group(2)
190 190 group3 = m.group(3)
191 191 longoption = group3[2:].strip()
192 192 desc = m.group(6).strip()
193 193 longoptionarg = m.group(5).strip()
194 194 blocks[j]['lines'][0] = desc
195 195
196 196 noshortop = ''
197 197 if not shortoption:
198 198 noshortop = ' '
199 199
200 200 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
201 201 ("%s--%s %s") % (noshortop, longoption,
202 202 longoptionarg))
203 203 opt = opt.rstrip()
204 204 blocks[j]['optstr'] = opt
205 205 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
206 206 j += 1
207 207
208 208 for block in blocks[i:j]:
209 209 block['optstrwidth'] = optstrwidth
210 210 i = j + 1
211 211 return blocks
212 212
213 213 def prunecontainers(blocks, keep):
214 214 """Prune unwanted containers.
215 215
216 216 The blocks must have a 'type' field, i.e., they should have been
217 217 run through findliteralblocks first.
218 218 """
219 219 pruned = []
220 220 i = 0
221 221 while i + 1 < len(blocks):
222 222 # Searching for a block that looks like this:
223 223 #
224 224 # +-------+---------------------------+
225 225 # | ".. container ::" type |
226 226 # +---+ |
227 227 # | blocks |
228 228 # +-------------------------------+
229 229 if (blocks[i]['type'] == 'paragraph' and
230 230 blocks[i]['lines'][0].startswith('.. container::')):
231 231 indent = blocks[i]['indent']
232 232 adjustment = blocks[i + 1]['indent'] - indent
233 233 containertype = blocks[i]['lines'][0][15:]
234 234 prune = containertype not in keep
235 235 if prune:
236 236 pruned.append(containertype)
237 237
238 238 # Always delete "..container:: type" block
239 239 del blocks[i]
240 240 j = i
241 241 while j < len(blocks) and blocks[j]['indent'] > indent:
242 242 if prune:
243 243 del blocks[j]
244 244 i -= 1 # adjust outer index
245 245 else:
246 246 blocks[j]['indent'] -= adjustment
247 247 j += 1
248 248 i += 1
249 249 return blocks, pruned
250 250
251 251
252 252 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
253 253
254 254 def findsections(blocks):
255 255 """Finds sections.
256 256
257 257 The blocks must have a 'type' field, i.e., they should have been
258 258 run through findliteralblocks first.
259 259 """
260 260 for block in blocks:
261 261 # Searching for a block that looks like this:
262 262 #
263 263 # +------------------------------+
264 264 # | Section title |
265 265 # | ------------- |
266 266 # +------------------------------+
267 267 if (block['type'] == 'paragraph' and
268 268 len(block['lines']) == 2 and
269 269 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
270 270 _sectionre.match(block['lines'][1])):
271 271 block['underline'] = block['lines'][1][0]
272 272 block['type'] = 'section'
273 273 del block['lines'][1]
274 274 return blocks
275 275
276 276
277 277 def inlineliterals(blocks):
278 278 substs = [('``', '"')]
279 279 for b in blocks:
280 280 if b['type'] in ('paragraph', 'section'):
281 281 b['lines'] = [replace(l, substs) for l in b['lines']]
282 282 return blocks
283 283
284 284
285 285 def hgrole(blocks):
286 286 substs = [(':hg:`', '"hg '), ('`', '"')]
287 287 for b in blocks:
288 288 if b['type'] in ('paragraph', 'section'):
289 289 # Turn :hg:`command` into "hg command". This also works
290 290 # when there is a line break in the command and relies on
291 291 # the fact that we have no stray back-quotes in the input
292 292 # (run the blocks through inlineliterals first).
293 293 b['lines'] = [replace(l, substs) for l in b['lines']]
294 294 return blocks
295 295
296 296
297 297 def addmargins(blocks):
298 298 """Adds empty blocks for vertical spacing.
299 299
300 300 This groups bullets, options, and definitions together with no vertical
301 301 space between them, and adds an empty block between all other blocks.
302 302 """
303 303 i = 1
304 304 while i < len(blocks):
305 305 if (blocks[i]['type'] == blocks[i - 1]['type'] and
306 306 blocks[i]['type'] in ('bullet', 'option', 'field')):
307 307 i += 1
308 308 else:
309 309 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
310 310 i += 2
311 311 return blocks
312 312
313 313 def prunecomments(blocks):
314 314 """Remove comments."""
315 315 i = 0
316 316 while i < len(blocks):
317 317 b = blocks[i]
318 318 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
319 319 b['lines'] == ['..']):
320 320 del blocks[i]
321 321 if i < len(blocks) and blocks[i]['type'] == 'margin':
322 322 del blocks[i]
323 323 else:
324 324 i += 1
325 325 return blocks
326 326
327 327 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
328 328 r"error|hint|important|note|tip|warning)::",
329 329 flags=re.IGNORECASE)
330 330
331 331 def findadmonitions(blocks):
332 332 """
333 333 Makes the type of the block an admonition block if
334 334 the first line is an admonition directive
335 335 """
336 336 i = 0
337 337 while i < len(blocks):
338 338 m = _admonitionre.match(blocks[i]['lines'][0])
339 339 if m:
340 340 blocks[i]['type'] = 'admonition'
341 341 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
342 342
343 343 firstline = blocks[i]['lines'][0][m.end() + 1:]
344 344 if firstline:
345 345 blocks[i]['lines'].insert(1, ' ' + firstline)
346 346
347 347 blocks[i]['admonitiontitle'] = admonitiontitle
348 348 del blocks[i]['lines'][0]
349 349 i = i + 1
350 350 return blocks
351 351
352 352 _admonitiontitles = {'attention': _('Attention:'),
353 353 'caution': _('Caution:'),
354 354 'danger': _('!Danger!') ,
355 355 'error': _('Error:'),
356 356 'hint': _('Hint:'),
357 357 'important': _('Important:'),
358 358 'note': _('Note:'),
359 359 'tip': _('Tip:'),
360 360 'warning': _('Warning!')}
361 361
362 362 def formatoption(block, width):
363 363 desc = ' '.join(map(str.strip, block['lines']))
364 364 colwidth = encoding.colwidth(block['optstr'])
365 365 usablewidth = width - 1
366 366 hanging = block['optstrwidth']
367 367 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
368 368 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
369 369 return ' %s' % (util.wrap(desc, usablewidth,
370 370 initindent=initindent,
371 371 hangindent=hangindent))
372 372
373 373 def formatblock(block, width):
374 374 """Format a block according to width."""
375 375 if width <= 0:
376 376 width = 78
377 377 indent = ' ' * block['indent']
378 378 if block['type'] == 'admonition':
379 379 admonition = _admonitiontitles[block['admonitiontitle']]
380 380 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
381 381
382 382 defindent = indent + hang * ' '
383 383 text = ' '.join(map(str.strip, block['lines']))
384 384 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
385 385 initindent=defindent,
386 386 hangindent=defindent))
387 387 if block['type'] == 'margin':
388 388 return ''
389 389 if block['type'] == 'literal':
390 390 indent += ' '
391 391 return indent + ('\n' + indent).join(block['lines'])
392 392 if block['type'] == 'section':
393 393 underline = encoding.colwidth(block['lines'][0]) * block['underline']
394 394 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
395 395 if block['type'] == 'definition':
396 396 term = indent + block['lines'][0]
397 397 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
398 398 defindent = indent + hang * ' '
399 399 text = ' '.join(map(str.strip, block['lines'][1:]))
400 400 return '%s\n%s' % (term, util.wrap(text, width=width,
401 401 initindent=defindent,
402 402 hangindent=defindent))
403 403 subindent = indent
404 404 if block['type'] == 'bullet':
405 405 if block['lines'][0].startswith('| '):
406 406 # Remove bullet for line blocks and add no extra
407 407 # indention.
408 408 block['lines'][0] = block['lines'][0][2:]
409 409 else:
410 410 m = _bulletre.match(block['lines'][0])
411 411 subindent = indent + m.end() * ' '
412 412 elif block['type'] == 'field':
413 413 keywidth = block['keywidth']
414 414 key = block['key']
415 415
416 416 subindent = indent + _fieldwidth * ' '
417 417 if len(key) + 2 > _fieldwidth:
418 418 # key too large, use full line width
419 419 key = key.ljust(width)
420 420 elif keywidth + 2 < _fieldwidth:
421 421 # all keys are small, add only two spaces
422 422 key = key.ljust(keywidth + 2)
423 423 subindent = indent + (keywidth + 2) * ' '
424 424 else:
425 425 # mixed sizes, use fieldwidth for this one
426 426 key = key.ljust(_fieldwidth)
427 427 block['lines'][0] = key + block['lines'][0]
428 428 elif block['type'] == 'option':
429 429 return formatoption(block, width)
430 430
431 431 text = ' '.join(map(str.strip, block['lines']))
432 432 return util.wrap(text, width=width,
433 433 initindent=indent,
434 434 hangindent=subindent)
435 435
436 436 def parse(text, indent=0, keep=None):
437 437 """Parse text into a list of blocks"""
438 438 pruned = []
439 439 blocks = findblocks(text)
440 440 for b in blocks:
441 441 b['indent'] += indent
442 442 blocks = findliteralblocks(blocks)
443 443 blocks, pruned = prunecontainers(blocks, keep or [])
444 444 blocks = findsections(blocks)
445 445 blocks = inlineliterals(blocks)
446 446 blocks = hgrole(blocks)
447 447 blocks = splitparagraphs(blocks)
448 448 blocks = updatefieldlists(blocks)
449 449 blocks = updateoptionlists(blocks)
450 450 blocks = addmargins(blocks)
451 451 blocks = prunecomments(blocks)
452 452 blocks = findadmonitions(blocks)
453 453 return blocks, pruned
454 454
455 455 def formatblocks(blocks, width):
456 456 text = '\n'.join(formatblock(b, width) for b in blocks)
457 457 return text
458 458
459 459 def format(text, width, indent=0, keep=None):
460 460 """Parse and format the text according to width."""
461 461 blocks, pruned = parse(text, indent, keep or [])
462 462 text = '\n'.join(formatblock(b, width) for b in blocks)
463 463 if keep is None:
464 464 return text
465 465 else:
466 466 return text, pruned
467 467
468 def getsections(blocks):
469 '''return a list of (section name, nesting level, blocks) tuples'''
470 nest = ""
471 level = 0
472 secs = []
473 for b in blocks:
474 if b['type'] == 'section':
475 i = b['underline']
476 if i not in nest:
477 nest += i
478 level = nest.index(i) + 1
479 nest = nest[:level]
480 secs.append((b['lines'][0], level, [b]))
481 else:
482 if not secs:
483 # add an initial empty section
484 secs = [('', 0, [])]
485 secs[-1][2].append(b)
486 return secs
468 487
469 488 if __name__ == "__main__":
470 489 from pprint import pprint
471 490
472 491 def debug(func, *args):
473 492 blocks = func(*args)
474 493 print "*** after %s:" % func.__name__
475 494 pprint(blocks)
476 495 print
477 496 return blocks
478 497
479 498 text = sys.stdin.read()
480 499 blocks = debug(findblocks, text)
481 500 blocks = debug(findliteralblocks, blocks)
482 501 blocks, pruned = debug(prunecontainers, blocks, sys.argv[1:])
483 502 blocks = debug(inlineliterals, blocks)
484 503 blocks = debug(splitparagraphs, blocks)
485 504 blocks = debug(updatefieldlists, blocks)
486 505 blocks = debug(updateoptionlists, blocks)
487 506 blocks = debug(findsections, blocks)
488 507 blocks = debug(addmargins, blocks)
489 508 blocks = debug(prunecomments, blocks)
490 509 blocks = debug(findadmonitions, blocks)
491 510 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now