##// END OF EJS Templates
minirst: small code cleanup
Martin Geisler -
r12620:9a9312e8 default
parent child Browse files
Show More
@@ -1,444 +1,441 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - sections
20 20
21 21 - paragraphs
22 22
23 23 - literal blocks
24 24
25 25 - definition lists
26 26
27 27 - specific admonitions
28 28
29 29 - bullet lists (items must start with '-')
30 30
31 31 - enumerated lists (no autonumbering)
32 32
33 33 - field lists (colons cannot be escaped)
34 34
35 35 - option lists (supports only long options without arguments)
36 36
37 37 - inline literals (no other inline markup is not recognized)
38 38 """
39 39
40 40 import re, sys
41 41 import util, encoding
42 42 from i18n import _
43 43
44 44
45 45 def replace(text, substs):
46 46 utext = text.decode(encoding.encoding)
47 47 for f, t in substs:
48 48 utext = utext.replace(f, t)
49 49 return utext.encode(encoding.encoding)
50 50
51 51 def findblocks(text):
52 52 """Find continuous blocks of lines in text.
53 53
54 54 Returns a list of dictionaries representing the blocks. Each block
55 55 has an 'indent' field and a 'lines' field.
56 56 """
57 57 blocks = [[]]
58 58 lines = text.splitlines()
59 59 for line in lines:
60 60 if line.strip():
61 61 blocks[-1].append(line)
62 62 elif blocks[-1]:
63 63 blocks.append([])
64 64 if not blocks[-1]:
65 65 del blocks[-1]
66 66
67 67 for i, block in enumerate(blocks):
68 68 indent = min((len(l) - len(l.lstrip())) for l in block)
69 69 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
70 70 return blocks
71 71
72 72
73 73 def findliteralblocks(blocks):
74 74 """Finds literal blocks and adds a 'type' field to the blocks.
75 75
76 76 Literal blocks are given the type 'literal', all other blocks are
77 77 given type the 'paragraph'.
78 78 """
79 79 i = 0
80 80 while i < len(blocks):
81 81 # Searching for a block that looks like this:
82 82 #
83 83 # +------------------------------+
84 84 # | paragraph |
85 85 # | (ends with "::") |
86 86 # +------------------------------+
87 87 # +---------------------------+
88 88 # | indented literal block |
89 89 # +---------------------------+
90 90 blocks[i]['type'] = 'paragraph'
91 91 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
92 92 indent = blocks[i]['indent']
93 93 adjustment = blocks[i + 1]['indent'] - indent
94 94
95 95 if blocks[i]['lines'] == ['::']:
96 96 # Expanded form: remove block
97 97 del blocks[i]
98 98 i -= 1
99 99 elif blocks[i]['lines'][-1].endswith(' ::'):
100 100 # Partially minimized form: remove space and both
101 101 # colons.
102 102 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
103 103 else:
104 104 # Fully minimized form: remove just one colon.
105 105 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
106 106
107 107 # List items are formatted with a hanging indent. We must
108 108 # correct for this here while we still have the original
109 109 # information on the indentation of the subsequent literal
110 110 # blocks available.
111 111 m = _bulletre.match(blocks[i]['lines'][0])
112 112 if m:
113 113 indent += m.end()
114 114 adjustment -= m.end()
115 115
116 116 # Mark the following indented blocks.
117 117 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
118 118 blocks[i + 1]['type'] = 'literal'
119 119 blocks[i + 1]['indent'] -= adjustment
120 120 i += 1
121 121 i += 1
122 122 return blocks
123 123
124 124 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
125 125 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
126 126 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
127 127 _definitionre = re.compile(r'[^ ]')
128 128
129 129 def splitparagraphs(blocks):
130 130 """Split paragraphs into lists."""
131 131 # Tuples with (list type, item regexp, single line items?). Order
132 132 # matters: definition lists has the least specific regexp and must
133 133 # come last.
134 134 listtypes = [('bullet', _bulletre, True),
135 135 ('option', _optionre, True),
136 136 ('field', _fieldre, True),
137 137 ('definition', _definitionre, False)]
138 138
139 139 def match(lines, i, itemre, singleline):
140 140 """Does itemre match an item at line i?
141 141
142 142 A list item can be followed by an idented line or another list
143 143 item (but only if singleline is True).
144 144 """
145 145 line1 = lines[i]
146 146 line2 = i + 1 < len(lines) and lines[i + 1] or ''
147 147 if not itemre.match(line1):
148 148 return False
149 149 if singleline:
150 150 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
151 151 else:
152 152 return line2.startswith(' ')
153 153
154 154 i = 0
155 155 while i < len(blocks):
156 156 if blocks[i]['type'] == 'paragraph':
157 157 lines = blocks[i]['lines']
158 158 for type, itemre, singleline in listtypes:
159 159 if match(lines, 0, itemre, singleline):
160 160 items = []
161 161 for j, line in enumerate(lines):
162 162 if match(lines, j, itemre, singleline):
163 163 items.append(dict(type=type, lines=[],
164 164 indent=blocks[i]['indent']))
165 165 items[-1]['lines'].append(line)
166 166 blocks[i:i + 1] = items
167 167 break
168 168 i += 1
169 169 return blocks
170 170
171 171
172 172 _fieldwidth = 12
173 173
174 174 def updatefieldlists(blocks):
175 175 """Find key and maximum key width for field lists."""
176 176 i = 0
177 177 while i < len(blocks):
178 178 if blocks[i]['type'] != 'field':
179 179 i += 1
180 180 continue
181 181
182 182 keywidth = 0
183 183 j = i
184 184 while j < len(blocks) and blocks[j]['type'] == 'field':
185 185 m = _fieldre.match(blocks[j]['lines'][0])
186 186 key, rest = m.groups()
187 187 blocks[j]['lines'][0] = rest
188 188 blocks[j]['key'] = key
189 189 keywidth = max(keywidth, len(key))
190 190 j += 1
191 191
192 192 for block in blocks[i:j]:
193 193 block['keywidth'] = keywidth
194 194 i = j + 1
195 195
196 196 return blocks
197 197
198 198
199 199 def prunecontainers(blocks, keep):
200 200 """Prune unwanted containers.
201 201
202 202 The blocks must have a 'type' field, i.e., they should have been
203 203 run through findliteralblocks first.
204 204 """
205 205 pruned = []
206 206 i = 0
207 207 while i + 1 < len(blocks):
208 208 # Searching for a block that looks like this:
209 209 #
210 210 # +-------+---------------------------+
211 211 # | ".. container ::" type |
212 212 # +---+ |
213 213 # | blocks |
214 214 # +-------------------------------+
215 215 if (blocks[i]['type'] == 'paragraph' and
216 216 blocks[i]['lines'][0].startswith('.. container::')):
217 217 indent = blocks[i]['indent']
218 218 adjustment = blocks[i + 1]['indent'] - indent
219 219 containertype = blocks[i]['lines'][0][15:]
220 220 prune = containertype not in keep
221 221 if prune:
222 222 pruned.append(containertype)
223 223
224 224 # Always delete "..container:: type" block
225 225 del blocks[i]
226 226 j = i
227 227 while j < len(blocks) and blocks[j]['indent'] > indent:
228 228 if prune:
229 229 del blocks[j]
230 230 i -= 1 # adjust outer index
231 231 else:
232 232 blocks[j]['indent'] -= adjustment
233 233 j += 1
234 234 i += 1
235 235 return blocks, pruned
236 236
237 237
238 238 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
239 239
240 240 def findsections(blocks):
241 241 """Finds sections.
242 242
243 243 The blocks must have a 'type' field, i.e., they should have been
244 244 run through findliteralblocks first.
245 245 """
246 246 for block in blocks:
247 247 # Searching for a block that looks like this:
248 248 #
249 249 # +------------------------------+
250 250 # | Section title |
251 251 # | ------------- |
252 252 # +------------------------------+
253 253 if (block['type'] == 'paragraph' and
254 254 len(block['lines']) == 2 and
255 255 len(block['lines'][0]) == len(block['lines'][1]) and
256 256 _sectionre.match(block['lines'][1])):
257 257 block['underline'] = block['lines'][1][0]
258 258 block['type'] = 'section'
259 259 del block['lines'][1]
260 260 return blocks
261 261
262 262
263 263 def inlineliterals(blocks):
264 264 substs = [('``', '"')]
265 265 for b in blocks:
266 266 if b['type'] in ('paragraph', 'section'):
267 267 b['lines'] = [replace(l, substs) for l in b['lines']]
268 268 return blocks
269 269
270 270
271 271 def hgrole(blocks):
272 272 substs = [(':hg:`', '"hg '), ('`', '"')]
273 273 for b in blocks:
274 274 if b['type'] in ('paragraph', 'section'):
275 275 # Turn :hg:`command` into "hg command". This also works
276 276 # when there is a line break in the command and relies on
277 277 # the fact that we have no stray back-quotes in the input
278 278 # (run the blocks through inlineliterals first).
279 279 b['lines'] = [replace(l, substs) for l in b['lines']]
280 280 return blocks
281 281
282 282
283 283 def addmargins(blocks):
284 284 """Adds empty blocks for vertical spacing.
285 285
286 286 This groups bullets, options, and definitions together with no vertical
287 287 space between them, and adds an empty block between all other blocks.
288 288 """
289 289 i = 1
290 290 while i < len(blocks):
291 291 if (blocks[i]['type'] == blocks[i - 1]['type'] and
292 292 blocks[i]['type'] in ('bullet', 'option', 'field')):
293 293 i += 1
294 294 else:
295 295 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
296 296 i += 2
297 297 return blocks
298 298
299 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
300 r"error|hint|important|note|tip|warning)::",
301 flags=re.IGNORECASE)
302
299 303 def findadmonitions(blocks):
300 304 """
301 305 Makes the type of the block an admonition block if
302 306 the first line is an admonition directive
303 307 """
304
305 308 i = 0
306
307 pattern = (r"\.\. (admonition|attention|caution|danger|error|hint|"
308 r"important|note|tip|warning)::")
309
310 prog = re.compile(pattern, flags=re.IGNORECASE)
311 309 while i < len(blocks):
312 m = prog.match(blocks[i]['lines'][0])
310 m = _admonitionre.match(blocks[i]['lines'][0])
313 311 if m:
314 312 blocks[i]['type'] = 'admonition'
315 313 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
316 314
317 315 firstline = blocks[i]['lines'][0][m.end() + 1:]
318 if firstline != '':
319 blocks[i]['lines'].insert(1, ' ' + firstline + '')
320
316 if firstline:
317 blocks[i]['lines'].insert(1, ' ' + firstline)
321 318
322 319 blocks[i]['admonitiontitle'] = admonitiontitle
323 320 del blocks[i]['lines'][0]
324 321 i = i + 1
325 322 return blocks
326 323
327 324 def formatblock(block, width):
328 325 """Format a block according to width."""
329 326 if width <= 0:
330 327 width = 78
331 328 indent = ' ' * block['indent']
332 329 if block['type'] == 'admonition':
333 330 titles = {'attention': _('Attention:'),
334 331 'caution': _('Caution:'),
335 332 'danger': _('!Danger!') ,
336 333 'error': _('Error:'),
337 334 'hint': _('Hint:'),
338 335 'important': _('Important:'),
339 336 'note': _('Note:'),
340 337 'tip': _('Tip:'),
341 338 'warning': _('Warning!')}
342 339
343 340 admonition = titles[block['admonitiontitle']]
344 341 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
345 342
346 343 defindent = indent + hang * ' '
347 344 text = ' '.join(map(str.strip, block['lines']))
348 345 return '%s\n%s' % (indent + admonition, util.wrap(text, width=width,
349 346 initindent=defindent,
350 347 hangindent=defindent))
351 348 if block['type'] == 'margin':
352 349 return ''
353 350 if block['type'] == 'literal':
354 351 indent += ' '
355 352 return indent + ('\n' + indent).join(block['lines'])
356 353 if block['type'] == 'section':
357 354 underline = len(block['lines'][0]) * block['underline']
358 355 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
359 356 if block['type'] == 'definition':
360 357 term = indent + block['lines'][0]
361 358 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
362 359 defindent = indent + hang * ' '
363 360 text = ' '.join(map(str.strip, block['lines'][1:]))
364 361 return '%s\n%s' % (term, util.wrap(text, width=width,
365 362 initindent=defindent,
366 363 hangindent=defindent))
367 364 subindent = indent
368 365 if block['type'] == 'bullet':
369 366 if block['lines'][0].startswith('| '):
370 367 # Remove bullet for line blocks and add no extra
371 368 # indention.
372 369 block['lines'][0] = block['lines'][0][2:]
373 370 else:
374 371 m = _bulletre.match(block['lines'][0])
375 372 subindent = indent + m.end() * ' '
376 373 elif block['type'] == 'field':
377 374 keywidth = block['keywidth']
378 375 key = block['key']
379 376
380 377 subindent = indent + _fieldwidth * ' '
381 378 if len(key) + 2 > _fieldwidth:
382 379 # key too large, use full line width
383 380 key = key.ljust(width)
384 381 elif keywidth + 2 < _fieldwidth:
385 382 # all keys are small, add only two spaces
386 383 key = key.ljust(keywidth + 2)
387 384 subindent = indent + (keywidth + 2) * ' '
388 385 else:
389 386 # mixed sizes, use fieldwidth for this one
390 387 key = key.ljust(_fieldwidth)
391 388 block['lines'][0] = key + block['lines'][0]
392 389 elif block['type'] == 'option':
393 390 m = _optionre.match(block['lines'][0])
394 391 option, arg, rest = m.groups()
395 392 subindent = indent + (len(option) + len(arg)) * ' '
396 393
397 394 text = ' '.join(map(str.strip, block['lines']))
398 395 return util.wrap(text, width=width,
399 396 initindent=indent,
400 397 hangindent=subindent)
401 398
402 399
403 400 def format(text, width, indent=0, keep=None):
404 401 """Parse and format the text according to width."""
405 402 blocks = findblocks(text)
406 403 for b in blocks:
407 404 b['indent'] += indent
408 405 blocks = findliteralblocks(blocks)
409 406 blocks, pruned = prunecontainers(blocks, keep or [])
410 407 blocks = findsections(blocks)
411 408 blocks = inlineliterals(blocks)
412 409 blocks = hgrole(blocks)
413 410 blocks = splitparagraphs(blocks)
414 411 blocks = updatefieldlists(blocks)
415 412 blocks = addmargins(blocks)
416 413 blocks = findadmonitions(blocks)
417 414 text = '\n'.join(formatblock(b, width) for b in blocks)
418 415 if keep is None:
419 416 return text
420 417 else:
421 418 return text, pruned
422 419
423 420
424 421 if __name__ == "__main__":
425 422 from pprint import pprint
426 423
427 424 def debug(func, *args):
428 425 blocks = func(*args)
429 426 print "*** after %s:" % func.__name__
430 427 pprint(blocks)
431 428 print
432 429 return blocks
433 430
434 431 text = open(sys.argv[1]).read()
435 432 blocks = debug(findblocks, text)
436 433 blocks = debug(findliteralblocks, blocks)
437 434 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
438 435 blocks = debug(inlineliterals, blocks)
439 436 blocks = debug(splitparagraphs, blocks)
440 437 blocks = debug(updatefieldlists, blocks)
441 438 blocks = debug(findsections, blocks)
442 439 blocks = debug(addmargins, blocks)
443 440 blocks = debug(findadmonitions, blocks)
444 441 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now