##// END OF EJS Templates
minirst: link to HelpStyleGuide in docstring
Martin Geisler -
r12958:8957c398 default
parent child Browse files
Show More
@@ -1,450 +1,431 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 It only supports a small subset of reStructuredText:
18
19 - sections
20
21 - paragraphs
22
23 - literal blocks
24
25 - definition lists
26
27 - specific admonitions
28
29 - bullet lists (items must start with '-')
30
31 - enumerated lists (no autonumbering)
32
33 - field lists (colons cannot be escaped)
34
35 - option lists (supports only long options without arguments)
36
37 - inline literals (no other inline markup is not recognized)
17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 when adding support for new constructs.
38 19 """
39 20
40 21 import re, sys
41 22 import util, encoding
42 23 from i18n import _
43 24
44 25
45 26 def replace(text, substs):
46 27 utext = text.decode(encoding.encoding)
47 28 for f, t in substs:
48 29 utext = utext.replace(f, t)
49 30 return utext.encode(encoding.encoding)
50 31
51 32
52 33 _blockre = re.compile(r"\n(?:\s*\n)+")
53 34
54 35 def findblocks(text):
55 36 """Find continuous blocks of lines in text.
56 37
57 38 Returns a list of dictionaries representing the blocks. Each block
58 39 has an 'indent' field and a 'lines' field.
59 40 """
60 41 blocks = []
61 42 for b in _blockre.split(text.strip()):
62 43 lines = b.splitlines()
63 44 indent = min((len(l) - len(l.lstrip())) for l in lines)
64 45 lines = [l[indent:] for l in lines]
65 46 blocks.append(dict(indent=indent, lines=lines))
66 47 return blocks
67 48
68 49
69 50 def findliteralblocks(blocks):
70 51 """Finds literal blocks and adds a 'type' field to the blocks.
71 52
72 53 Literal blocks are given the type 'literal', all other blocks are
73 54 given type the 'paragraph'.
74 55 """
75 56 i = 0
76 57 while i < len(blocks):
77 58 # Searching for a block that looks like this:
78 59 #
79 60 # +------------------------------+
80 61 # | paragraph |
81 62 # | (ends with "::") |
82 63 # +------------------------------+
83 64 # +---------------------------+
84 65 # | indented literal block |
85 66 # +---------------------------+
86 67 blocks[i]['type'] = 'paragraph'
87 68 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
88 69 indent = blocks[i]['indent']
89 70 adjustment = blocks[i + 1]['indent'] - indent
90 71
91 72 if blocks[i]['lines'] == ['::']:
92 73 # Expanded form: remove block
93 74 del blocks[i]
94 75 i -= 1
95 76 elif blocks[i]['lines'][-1].endswith(' ::'):
96 77 # Partially minimized form: remove space and both
97 78 # colons.
98 79 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
99 80 else:
100 81 # Fully minimized form: remove just one colon.
101 82 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
102 83
103 84 # List items are formatted with a hanging indent. We must
104 85 # correct for this here while we still have the original
105 86 # information on the indentation of the subsequent literal
106 87 # blocks available.
107 88 m = _bulletre.match(blocks[i]['lines'][0])
108 89 if m:
109 90 indent += m.end()
110 91 adjustment -= m.end()
111 92
112 93 # Mark the following indented blocks.
113 94 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
114 95 blocks[i + 1]['type'] = 'literal'
115 96 blocks[i + 1]['indent'] -= adjustment
116 97 i += 1
117 98 i += 1
118 99 return blocks
119 100
120 101 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
121 102 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
122 103 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
123 104 _definitionre = re.compile(r'[^ ]')
124 105
125 106 def splitparagraphs(blocks):
126 107 """Split paragraphs into lists."""
127 108 # Tuples with (list type, item regexp, single line items?). Order
128 109 # matters: definition lists has the least specific regexp and must
129 110 # come last.
130 111 listtypes = [('bullet', _bulletre, True),
131 112 ('option', _optionre, True),
132 113 ('field', _fieldre, True),
133 114 ('definition', _definitionre, False)]
134 115
135 116 def match(lines, i, itemre, singleline):
136 117 """Does itemre match an item at line i?
137 118
138 119 A list item can be followed by an idented line or another list
139 120 item (but only if singleline is True).
140 121 """
141 122 line1 = lines[i]
142 123 line2 = i + 1 < len(lines) and lines[i + 1] or ''
143 124 if not itemre.match(line1):
144 125 return False
145 126 if singleline:
146 127 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
147 128 else:
148 129 return line2.startswith(' ')
149 130
150 131 i = 0
151 132 while i < len(blocks):
152 133 if blocks[i]['type'] == 'paragraph':
153 134 lines = blocks[i]['lines']
154 135 for type, itemre, singleline in listtypes:
155 136 if match(lines, 0, itemre, singleline):
156 137 items = []
157 138 for j, line in enumerate(lines):
158 139 if match(lines, j, itemre, singleline):
159 140 items.append(dict(type=type, lines=[],
160 141 indent=blocks[i]['indent']))
161 142 items[-1]['lines'].append(line)
162 143 blocks[i:i + 1] = items
163 144 break
164 145 i += 1
165 146 return blocks
166 147
167 148
168 149 _fieldwidth = 12
169 150
170 151 def updatefieldlists(blocks):
171 152 """Find key and maximum key width for field lists."""
172 153 i = 0
173 154 while i < len(blocks):
174 155 if blocks[i]['type'] != 'field':
175 156 i += 1
176 157 continue
177 158
178 159 keywidth = 0
179 160 j = i
180 161 while j < len(blocks) and blocks[j]['type'] == 'field':
181 162 m = _fieldre.match(blocks[j]['lines'][0])
182 163 key, rest = m.groups()
183 164 blocks[j]['lines'][0] = rest
184 165 blocks[j]['key'] = key
185 166 keywidth = max(keywidth, len(key))
186 167 j += 1
187 168
188 169 for block in blocks[i:j]:
189 170 block['keywidth'] = keywidth
190 171 i = j + 1
191 172
192 173 return blocks
193 174
194 175
195 176 def prunecontainers(blocks, keep):
196 177 """Prune unwanted containers.
197 178
198 179 The blocks must have a 'type' field, i.e., they should have been
199 180 run through findliteralblocks first.
200 181 """
201 182 pruned = []
202 183 i = 0
203 184 while i + 1 < len(blocks):
204 185 # Searching for a block that looks like this:
205 186 #
206 187 # +-------+---------------------------+
207 188 # | ".. container ::" type |
208 189 # +---+ |
209 190 # | blocks |
210 191 # +-------------------------------+
211 192 if (blocks[i]['type'] == 'paragraph' and
212 193 blocks[i]['lines'][0].startswith('.. container::')):
213 194 indent = blocks[i]['indent']
214 195 adjustment = blocks[i + 1]['indent'] - indent
215 196 containertype = blocks[i]['lines'][0][15:]
216 197 prune = containertype not in keep
217 198 if prune:
218 199 pruned.append(containertype)
219 200
220 201 # Always delete "..container:: type" block
221 202 del blocks[i]
222 203 j = i
223 204 while j < len(blocks) and blocks[j]['indent'] > indent:
224 205 if prune:
225 206 del blocks[j]
226 207 i -= 1 # adjust outer index
227 208 else:
228 209 blocks[j]['indent'] -= adjustment
229 210 j += 1
230 211 i += 1
231 212 return blocks, pruned
232 213
233 214
234 215 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
235 216
236 217 def findsections(blocks):
237 218 """Finds sections.
238 219
239 220 The blocks must have a 'type' field, i.e., they should have been
240 221 run through findliteralblocks first.
241 222 """
242 223 for block in blocks:
243 224 # Searching for a block that looks like this:
244 225 #
245 226 # +------------------------------+
246 227 # | Section title |
247 228 # | ------------- |
248 229 # +------------------------------+
249 230 if (block['type'] == 'paragraph' and
250 231 len(block['lines']) == 2 and
251 232 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
252 233 _sectionre.match(block['lines'][1])):
253 234 block['underline'] = block['lines'][1][0]
254 235 block['type'] = 'section'
255 236 del block['lines'][1]
256 237 return blocks
257 238
258 239
259 240 def inlineliterals(blocks):
260 241 substs = [('``', '"')]
261 242 for b in blocks:
262 243 if b['type'] in ('paragraph', 'section'):
263 244 b['lines'] = [replace(l, substs) for l in b['lines']]
264 245 return blocks
265 246
266 247
267 248 def hgrole(blocks):
268 249 substs = [(':hg:`', '"hg '), ('`', '"')]
269 250 for b in blocks:
270 251 if b['type'] in ('paragraph', 'section'):
271 252 # Turn :hg:`command` into "hg command". This also works
272 253 # when there is a line break in the command and relies on
273 254 # the fact that we have no stray back-quotes in the input
274 255 # (run the blocks through inlineliterals first).
275 256 b['lines'] = [replace(l, substs) for l in b['lines']]
276 257 return blocks
277 258
278 259
279 260 def addmargins(blocks):
280 261 """Adds empty blocks for vertical spacing.
281 262
282 263 This groups bullets, options, and definitions together with no vertical
283 264 space between them, and adds an empty block between all other blocks.
284 265 """
285 266 i = 1
286 267 while i < len(blocks):
287 268 if (blocks[i]['type'] == blocks[i - 1]['type'] and
288 269 blocks[i]['type'] in ('bullet', 'option', 'field')):
289 270 i += 1
290 271 else:
291 272 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
292 273 i += 2
293 274 return blocks
294 275
295 276 def prunecomments(blocks):
296 277 """Remove comments."""
297 278 i = 0
298 279 while i < len(blocks):
299 280 b = blocks[i]
300 281 if b['type'] == 'paragraph' and b['lines'][0].startswith('.. '):
301 282 del blocks[i]
302 283 else:
303 284 i += 1
304 285 return blocks
305 286
306 287 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
307 288 r"error|hint|important|note|tip|warning)::",
308 289 flags=re.IGNORECASE)
309 290
310 291 def findadmonitions(blocks):
311 292 """
312 293 Makes the type of the block an admonition block if
313 294 the first line is an admonition directive
314 295 """
315 296 i = 0
316 297 while i < len(blocks):
317 298 m = _admonitionre.match(blocks[i]['lines'][0])
318 299 if m:
319 300 blocks[i]['type'] = 'admonition'
320 301 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
321 302
322 303 firstline = blocks[i]['lines'][0][m.end() + 1:]
323 304 if firstline:
324 305 blocks[i]['lines'].insert(1, ' ' + firstline)
325 306
326 307 blocks[i]['admonitiontitle'] = admonitiontitle
327 308 del blocks[i]['lines'][0]
328 309 i = i + 1
329 310 return blocks
330 311
331 312 _admonitiontitles = {'attention': _('Attention:'),
332 313 'caution': _('Caution:'),
333 314 'danger': _('!Danger!') ,
334 315 'error': _('Error:'),
335 316 'hint': _('Hint:'),
336 317 'important': _('Important:'),
337 318 'note': _('Note:'),
338 319 'tip': _('Tip:'),
339 320 'warning': _('Warning!')}
340 321
341 322 def formatblock(block, width):
342 323 """Format a block according to width."""
343 324 if width <= 0:
344 325 width = 78
345 326 indent = ' ' * block['indent']
346 327 if block['type'] == 'admonition':
347 328 admonition = _admonitiontitles[block['admonitiontitle']]
348 329 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
349 330
350 331 defindent = indent + hang * ' '
351 332 text = ' '.join(map(str.strip, block['lines']))
352 333 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
353 334 initindent=defindent,
354 335 hangindent=defindent))
355 336 if block['type'] == 'margin':
356 337 return ''
357 338 if block['type'] == 'literal':
358 339 indent += ' '
359 340 return indent + ('\n' + indent).join(block['lines'])
360 341 if block['type'] == 'section':
361 342 underline = encoding.colwidth(block['lines'][0]) * block['underline']
362 343 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
363 344 if block['type'] == 'definition':
364 345 term = indent + block['lines'][0]
365 346 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
366 347 defindent = indent + hang * ' '
367 348 text = ' '.join(map(str.strip, block['lines'][1:]))
368 349 return '%s\n%s' % (term, util.wrap(text, width=width,
369 350 initindent=defindent,
370 351 hangindent=defindent))
371 352 subindent = indent
372 353 if block['type'] == 'bullet':
373 354 if block['lines'][0].startswith('| '):
374 355 # Remove bullet for line blocks and add no extra
375 356 # indention.
376 357 block['lines'][0] = block['lines'][0][2:]
377 358 else:
378 359 m = _bulletre.match(block['lines'][0])
379 360 subindent = indent + m.end() * ' '
380 361 elif block['type'] == 'field':
381 362 keywidth = block['keywidth']
382 363 key = block['key']
383 364
384 365 subindent = indent + _fieldwidth * ' '
385 366 if len(key) + 2 > _fieldwidth:
386 367 # key too large, use full line width
387 368 key = key.ljust(width)
388 369 elif keywidth + 2 < _fieldwidth:
389 370 # all keys are small, add only two spaces
390 371 key = key.ljust(keywidth + 2)
391 372 subindent = indent + (keywidth + 2) * ' '
392 373 else:
393 374 # mixed sizes, use fieldwidth for this one
394 375 key = key.ljust(_fieldwidth)
395 376 block['lines'][0] = key + block['lines'][0]
396 377 elif block['type'] == 'option':
397 378 m = _optionre.match(block['lines'][0])
398 379 option, arg, rest = m.groups()
399 380 subindent = indent + (len(option) + len(arg)) * ' '
400 381
401 382 text = ' '.join(map(str.strip, block['lines']))
402 383 return util.wrap(text, width=width,
403 384 initindent=indent,
404 385 hangindent=subindent)
405 386
406 387
407 388 def format(text, width, indent=0, keep=None):
408 389 """Parse and format the text according to width."""
409 390 blocks = findblocks(text)
410 391 for b in blocks:
411 392 b['indent'] += indent
412 393 blocks = findliteralblocks(blocks)
413 394 blocks, pruned = prunecontainers(blocks, keep or [])
414 395 blocks = findsections(blocks)
415 396 blocks = inlineliterals(blocks)
416 397 blocks = hgrole(blocks)
417 398 blocks = splitparagraphs(blocks)
418 399 blocks = updatefieldlists(blocks)
419 400 blocks = prunecomments(blocks)
420 401 blocks = addmargins(blocks)
421 402 blocks = findadmonitions(blocks)
422 403 text = '\n'.join(formatblock(b, width) for b in blocks)
423 404 if keep is None:
424 405 return text
425 406 else:
426 407 return text, pruned
427 408
428 409
429 410 if __name__ == "__main__":
430 411 from pprint import pprint
431 412
432 413 def debug(func, *args):
433 414 blocks = func(*args)
434 415 print "*** after %s:" % func.__name__
435 416 pprint(blocks)
436 417 print
437 418 return blocks
438 419
439 420 text = open(sys.argv[1]).read()
440 421 blocks = debug(findblocks, text)
441 422 blocks = debug(findliteralblocks, blocks)
442 423 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
443 424 blocks = debug(inlineliterals, blocks)
444 425 blocks = debug(splitparagraphs, blocks)
445 426 blocks = debug(updatefieldlists, blocks)
446 427 blocks = debug(findsections, blocks)
447 428 blocks = debug(prunecomments, blocks)
448 429 blocks = debug(addmargins, blocks)
449 430 blocks = debug(findadmonitions, blocks)
450 431 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now