##// END OF EJS Templates
minirst: better interaction between comments and margins...
Martin Geisler -
r13003:876a931d default
parent child Browse files
Show More
@@ -1,431 +1,433 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 21 import re, sys
22 22 import util, encoding
23 23 from i18n import _
24 24
25 25
26 26 def replace(text, substs):
27 27 utext = text.decode(encoding.encoding)
28 28 for f, t in substs:
29 29 utext = utext.replace(f, t)
30 30 return utext.encode(encoding.encoding)
31 31
32 32
33 33 _blockre = re.compile(r"\n(?:\s*\n)+")
34 34
35 35 def findblocks(text):
36 36 """Find continuous blocks of lines in text.
37 37
38 38 Returns a list of dictionaries representing the blocks. Each block
39 39 has an 'indent' field and a 'lines' field.
40 40 """
41 41 blocks = []
42 42 for b in _blockre.split(text.strip()):
43 43 lines = b.splitlines()
44 44 indent = min((len(l) - len(l.lstrip())) for l in lines)
45 45 lines = [l[indent:] for l in lines]
46 46 blocks.append(dict(indent=indent, lines=lines))
47 47 return blocks
48 48
49 49
50 50 def findliteralblocks(blocks):
51 51 """Finds literal blocks and adds a 'type' field to the blocks.
52 52
53 53 Literal blocks are given the type 'literal', all other blocks are
54 54 given type the 'paragraph'.
55 55 """
56 56 i = 0
57 57 while i < len(blocks):
58 58 # Searching for a block that looks like this:
59 59 #
60 60 # +------------------------------+
61 61 # | paragraph |
62 62 # | (ends with "::") |
63 63 # +------------------------------+
64 64 # +---------------------------+
65 65 # | indented literal block |
66 66 # +---------------------------+
67 67 blocks[i]['type'] = 'paragraph'
68 68 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
69 69 indent = blocks[i]['indent']
70 70 adjustment = blocks[i + 1]['indent'] - indent
71 71
72 72 if blocks[i]['lines'] == ['::']:
73 73 # Expanded form: remove block
74 74 del blocks[i]
75 75 i -= 1
76 76 elif blocks[i]['lines'][-1].endswith(' ::'):
77 77 # Partially minimized form: remove space and both
78 78 # colons.
79 79 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
80 80 else:
81 81 # Fully minimized form: remove just one colon.
82 82 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
83 83
84 84 # List items are formatted with a hanging indent. We must
85 85 # correct for this here while we still have the original
86 86 # information on the indentation of the subsequent literal
87 87 # blocks available.
88 88 m = _bulletre.match(blocks[i]['lines'][0])
89 89 if m:
90 90 indent += m.end()
91 91 adjustment -= m.end()
92 92
93 93 # Mark the following indented blocks.
94 94 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
95 95 blocks[i + 1]['type'] = 'literal'
96 96 blocks[i + 1]['indent'] -= adjustment
97 97 i += 1
98 98 i += 1
99 99 return blocks
100 100
101 101 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
102 102 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
103 103 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
104 104 _definitionre = re.compile(r'[^ ]')
105 105
106 106 def splitparagraphs(blocks):
107 107 """Split paragraphs into lists."""
108 108 # Tuples with (list type, item regexp, single line items?). Order
109 109 # matters: definition lists has the least specific regexp and must
110 110 # come last.
111 111 listtypes = [('bullet', _bulletre, True),
112 112 ('option', _optionre, True),
113 113 ('field', _fieldre, True),
114 114 ('definition', _definitionre, False)]
115 115
116 116 def match(lines, i, itemre, singleline):
117 117 """Does itemre match an item at line i?
118 118
119 119 A list item can be followed by an idented line or another list
120 120 item (but only if singleline is True).
121 121 """
122 122 line1 = lines[i]
123 123 line2 = i + 1 < len(lines) and lines[i + 1] or ''
124 124 if not itemre.match(line1):
125 125 return False
126 126 if singleline:
127 127 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
128 128 else:
129 129 return line2.startswith(' ')
130 130
131 131 i = 0
132 132 while i < len(blocks):
133 133 if blocks[i]['type'] == 'paragraph':
134 134 lines = blocks[i]['lines']
135 135 for type, itemre, singleline in listtypes:
136 136 if match(lines, 0, itemre, singleline):
137 137 items = []
138 138 for j, line in enumerate(lines):
139 139 if match(lines, j, itemre, singleline):
140 140 items.append(dict(type=type, lines=[],
141 141 indent=blocks[i]['indent']))
142 142 items[-1]['lines'].append(line)
143 143 blocks[i:i + 1] = items
144 144 break
145 145 i += 1
146 146 return blocks
147 147
148 148
149 149 _fieldwidth = 12
150 150
151 151 def updatefieldlists(blocks):
152 152 """Find key and maximum key width for field lists."""
153 153 i = 0
154 154 while i < len(blocks):
155 155 if blocks[i]['type'] != 'field':
156 156 i += 1
157 157 continue
158 158
159 159 keywidth = 0
160 160 j = i
161 161 while j < len(blocks) and blocks[j]['type'] == 'field':
162 162 m = _fieldre.match(blocks[j]['lines'][0])
163 163 key, rest = m.groups()
164 164 blocks[j]['lines'][0] = rest
165 165 blocks[j]['key'] = key
166 166 keywidth = max(keywidth, len(key))
167 167 j += 1
168 168
169 169 for block in blocks[i:j]:
170 170 block['keywidth'] = keywidth
171 171 i = j + 1
172 172
173 173 return blocks
174 174
175 175
176 176 def prunecontainers(blocks, keep):
177 177 """Prune unwanted containers.
178 178
179 179 The blocks must have a 'type' field, i.e., they should have been
180 180 run through findliteralblocks first.
181 181 """
182 182 pruned = []
183 183 i = 0
184 184 while i + 1 < len(blocks):
185 185 # Searching for a block that looks like this:
186 186 #
187 187 # +-------+---------------------------+
188 188 # | ".. container ::" type |
189 189 # +---+ |
190 190 # | blocks |
191 191 # +-------------------------------+
192 192 if (blocks[i]['type'] == 'paragraph' and
193 193 blocks[i]['lines'][0].startswith('.. container::')):
194 194 indent = blocks[i]['indent']
195 195 adjustment = blocks[i + 1]['indent'] - indent
196 196 containertype = blocks[i]['lines'][0][15:]
197 197 prune = containertype not in keep
198 198 if prune:
199 199 pruned.append(containertype)
200 200
201 201 # Always delete "..container:: type" block
202 202 del blocks[i]
203 203 j = i
204 204 while j < len(blocks) and blocks[j]['indent'] > indent:
205 205 if prune:
206 206 del blocks[j]
207 207 i -= 1 # adjust outer index
208 208 else:
209 209 blocks[j]['indent'] -= adjustment
210 210 j += 1
211 211 i += 1
212 212 return blocks, pruned
213 213
214 214
215 215 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
216 216
217 217 def findsections(blocks):
218 218 """Finds sections.
219 219
220 220 The blocks must have a 'type' field, i.e., they should have been
221 221 run through findliteralblocks first.
222 222 """
223 223 for block in blocks:
224 224 # Searching for a block that looks like this:
225 225 #
226 226 # +------------------------------+
227 227 # | Section title |
228 228 # | ------------- |
229 229 # +------------------------------+
230 230 if (block['type'] == 'paragraph' and
231 231 len(block['lines']) == 2 and
232 232 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
233 233 _sectionre.match(block['lines'][1])):
234 234 block['underline'] = block['lines'][1][0]
235 235 block['type'] = 'section'
236 236 del block['lines'][1]
237 237 return blocks
238 238
239 239
240 240 def inlineliterals(blocks):
241 241 substs = [('``', '"')]
242 242 for b in blocks:
243 243 if b['type'] in ('paragraph', 'section'):
244 244 b['lines'] = [replace(l, substs) for l in b['lines']]
245 245 return blocks
246 246
247 247
248 248 def hgrole(blocks):
249 249 substs = [(':hg:`', '"hg '), ('`', '"')]
250 250 for b in blocks:
251 251 if b['type'] in ('paragraph', 'section'):
252 252 # Turn :hg:`command` into "hg command". This also works
253 253 # when there is a line break in the command and relies on
254 254 # the fact that we have no stray back-quotes in the input
255 255 # (run the blocks through inlineliterals first).
256 256 b['lines'] = [replace(l, substs) for l in b['lines']]
257 257 return blocks
258 258
259 259
260 260 def addmargins(blocks):
261 261 """Adds empty blocks for vertical spacing.
262 262
263 263 This groups bullets, options, and definitions together with no vertical
264 264 space between them, and adds an empty block between all other blocks.
265 265 """
266 266 i = 1
267 267 while i < len(blocks):
268 268 if (blocks[i]['type'] == blocks[i - 1]['type'] and
269 269 blocks[i]['type'] in ('bullet', 'option', 'field')):
270 270 i += 1
271 271 else:
272 272 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
273 273 i += 2
274 274 return blocks
275 275
276 276 def prunecomments(blocks):
277 277 """Remove comments."""
278 278 i = 0
279 279 while i < len(blocks):
280 280 b = blocks[i]
281 281 if b['type'] == 'paragraph' and b['lines'][0].startswith('.. '):
282 282 del blocks[i]
283 if i < len(blocks) and blocks[i]['type'] == 'margin':
284 del blocks[i]
283 285 else:
284 286 i += 1
285 287 return blocks
286 288
287 289 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
288 290 r"error|hint|important|note|tip|warning)::",
289 291 flags=re.IGNORECASE)
290 292
291 293 def findadmonitions(blocks):
292 294 """
293 295 Makes the type of the block an admonition block if
294 296 the first line is an admonition directive
295 297 """
296 298 i = 0
297 299 while i < len(blocks):
298 300 m = _admonitionre.match(blocks[i]['lines'][0])
299 301 if m:
300 302 blocks[i]['type'] = 'admonition'
301 303 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
302 304
303 305 firstline = blocks[i]['lines'][0][m.end() + 1:]
304 306 if firstline:
305 307 blocks[i]['lines'].insert(1, ' ' + firstline)
306 308
307 309 blocks[i]['admonitiontitle'] = admonitiontitle
308 310 del blocks[i]['lines'][0]
309 311 i = i + 1
310 312 return blocks
311 313
312 314 _admonitiontitles = {'attention': _('Attention:'),
313 315 'caution': _('Caution:'),
314 316 'danger': _('!Danger!') ,
315 317 'error': _('Error:'),
316 318 'hint': _('Hint:'),
317 319 'important': _('Important:'),
318 320 'note': _('Note:'),
319 321 'tip': _('Tip:'),
320 322 'warning': _('Warning!')}
321 323
322 324 def formatblock(block, width):
323 325 """Format a block according to width."""
324 326 if width <= 0:
325 327 width = 78
326 328 indent = ' ' * block['indent']
327 329 if block['type'] == 'admonition':
328 330 admonition = _admonitiontitles[block['admonitiontitle']]
329 331 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
330 332
331 333 defindent = indent + hang * ' '
332 334 text = ' '.join(map(str.strip, block['lines']))
333 335 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
334 336 initindent=defindent,
335 337 hangindent=defindent))
336 338 if block['type'] == 'margin':
337 339 return ''
338 340 if block['type'] == 'literal':
339 341 indent += ' '
340 342 return indent + ('\n' + indent).join(block['lines'])
341 343 if block['type'] == 'section':
342 344 underline = encoding.colwidth(block['lines'][0]) * block['underline']
343 345 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
344 346 if block['type'] == 'definition':
345 347 term = indent + block['lines'][0]
346 348 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
347 349 defindent = indent + hang * ' '
348 350 text = ' '.join(map(str.strip, block['lines'][1:]))
349 351 return '%s\n%s' % (term, util.wrap(text, width=width,
350 352 initindent=defindent,
351 353 hangindent=defindent))
352 354 subindent = indent
353 355 if block['type'] == 'bullet':
354 356 if block['lines'][0].startswith('| '):
355 357 # Remove bullet for line blocks and add no extra
356 358 # indention.
357 359 block['lines'][0] = block['lines'][0][2:]
358 360 else:
359 361 m = _bulletre.match(block['lines'][0])
360 362 subindent = indent + m.end() * ' '
361 363 elif block['type'] == 'field':
362 364 keywidth = block['keywidth']
363 365 key = block['key']
364 366
365 367 subindent = indent + _fieldwidth * ' '
366 368 if len(key) + 2 > _fieldwidth:
367 369 # key too large, use full line width
368 370 key = key.ljust(width)
369 371 elif keywidth + 2 < _fieldwidth:
370 372 # all keys are small, add only two spaces
371 373 key = key.ljust(keywidth + 2)
372 374 subindent = indent + (keywidth + 2) * ' '
373 375 else:
374 376 # mixed sizes, use fieldwidth for this one
375 377 key = key.ljust(_fieldwidth)
376 378 block['lines'][0] = key + block['lines'][0]
377 379 elif block['type'] == 'option':
378 380 m = _optionre.match(block['lines'][0])
379 381 option, arg, rest = m.groups()
380 382 subindent = indent + (len(option) + len(arg)) * ' '
381 383
382 384 text = ' '.join(map(str.strip, block['lines']))
383 385 return util.wrap(text, width=width,
384 386 initindent=indent,
385 387 hangindent=subindent)
386 388
387 389
388 390 def format(text, width, indent=0, keep=None):
389 391 """Parse and format the text according to width."""
390 392 blocks = findblocks(text)
391 393 for b in blocks:
392 394 b['indent'] += indent
393 395 blocks = findliteralblocks(blocks)
394 396 blocks, pruned = prunecontainers(blocks, keep or [])
395 397 blocks = findsections(blocks)
396 398 blocks = inlineliterals(blocks)
397 399 blocks = hgrole(blocks)
398 400 blocks = splitparagraphs(blocks)
399 401 blocks = updatefieldlists(blocks)
402 blocks = addmargins(blocks)
400 403 blocks = prunecomments(blocks)
401 blocks = addmargins(blocks)
402 404 blocks = findadmonitions(blocks)
403 405 text = '\n'.join(formatblock(b, width) for b in blocks)
404 406 if keep is None:
405 407 return text
406 408 else:
407 409 return text, pruned
408 410
409 411
410 412 if __name__ == "__main__":
411 413 from pprint import pprint
412 414
413 415 def debug(func, *args):
414 416 blocks = func(*args)
415 417 print "*** after %s:" % func.__name__
416 418 pprint(blocks)
417 419 print
418 420 return blocks
419 421
420 422 text = open(sys.argv[1]).read()
421 423 blocks = debug(findblocks, text)
422 424 blocks = debug(findliteralblocks, blocks)
423 425 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
424 426 blocks = debug(inlineliterals, blocks)
425 427 blocks = debug(splitparagraphs, blocks)
426 428 blocks = debug(updatefieldlists, blocks)
427 429 blocks = debug(findsections, blocks)
430 blocks = debug(addmargins, blocks)
428 431 blocks = debug(prunecomments, blocks)
429 blocks = debug(addmargins, blocks)
430 432 blocks = debug(findadmonitions, blocks)
431 433 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now