##// END OF EJS Templates
minirst: make encoding.encoding unicodes to pass into encode() and decode()
Pulkit Goyal -
r31318:1c3352d7 default
parent child Browse files
Show More
@@ -1,831 +1,832 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 Remember to update https://mercurial-scm.org/wiki/HelpStyleGuide
18 18 when adding support for new constructs.
19 19 """
20 20
21 21 from __future__ import absolute_import
22 22
23 23 import cgi
24 24 import re
25 25
26 26 from .i18n import _
27 27 from . import (
28 28 encoding,
29 pycompat,
29 30 util,
30 31 )
31 32
32 33 def section(s):
33 34 return "%s\n%s\n\n" % (s, "\"" * encoding.colwidth(s))
34 35
35 36 def subsection(s):
36 37 return "%s\n%s\n\n" % (s, '=' * encoding.colwidth(s))
37 38
38 39 def subsubsection(s):
39 40 return "%s\n%s\n\n" % (s, "-" * encoding.colwidth(s))
40 41
41 42 def subsubsubsection(s):
42 43 return "%s\n%s\n\n" % (s, "." * encoding.colwidth(s))
43 44
44 45 def replace(text, substs):
45 46 '''
46 47 Apply a list of (find, replace) pairs to a text.
47 48
48 49 >>> replace("foo bar", [('f', 'F'), ('b', 'B')])
49 50 'Foo Bar'
50 51 >>> encoding.encoding = 'latin1'
51 52 >>> replace('\\x81\\\\', [('\\\\', '/')])
52 53 '\\x81/'
53 54 >>> encoding.encoding = 'shiftjis'
54 55 >>> replace('\\x81\\\\', [('\\\\', '/')])
55 56 '\\x81\\\\'
56 57 '''
57 58
58 59 # some character encodings (cp932 for Japanese, at least) use
59 60 # ASCII characters other than control/alphabet/digit as a part of
60 61 # multi-bytes characters, so direct replacing with such characters
61 62 # on strings in local encoding causes invalid byte sequences.
62 utext = text.decode(encoding.encoding)
63 utext = text.decode(pycompat.sysstr(encoding.encoding))
63 64 for f, t in substs:
64 65 utext = utext.replace(f.decode("ascii"), t.decode("ascii"))
65 return utext.encode(encoding.encoding)
66 return utext.encode(pycompat.sysstr(encoding.encoding))
66 67
67 68 _blockre = re.compile(br"\n(?:\s*\n)+")
68 69
69 70 def findblocks(text):
70 71 """Find continuous blocks of lines in text.
71 72
72 73 Returns a list of dictionaries representing the blocks. Each block
73 74 has an 'indent' field and a 'lines' field.
74 75 """
75 76 blocks = []
76 77 for b in _blockre.split(text.lstrip('\n').rstrip()):
77 78 lines = b.splitlines()
78 79 if lines:
79 80 indent = min((len(l) - len(l.lstrip())) for l in lines)
80 81 lines = [l[indent:] for l in lines]
81 82 blocks.append({'indent': indent, 'lines': lines})
82 83 return blocks
83 84
84 85 def findliteralblocks(blocks):
85 86 """Finds literal blocks and adds a 'type' field to the blocks.
86 87
87 88 Literal blocks are given the type 'literal', all other blocks are
88 89 given type the 'paragraph'.
89 90 """
90 91 i = 0
91 92 while i < len(blocks):
92 93 # Searching for a block that looks like this:
93 94 #
94 95 # +------------------------------+
95 96 # | paragraph |
96 97 # | (ends with "::") |
97 98 # +------------------------------+
98 99 # +---------------------------+
99 100 # | indented literal block |
100 101 # +---------------------------+
101 102 blocks[i]['type'] = 'paragraph'
102 103 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
103 104 indent = blocks[i]['indent']
104 105 adjustment = blocks[i + 1]['indent'] - indent
105 106
106 107 if blocks[i]['lines'] == ['::']:
107 108 # Expanded form: remove block
108 109 del blocks[i]
109 110 i -= 1
110 111 elif blocks[i]['lines'][-1].endswith(' ::'):
111 112 # Partially minimized form: remove space and both
112 113 # colons.
113 114 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
114 115 elif len(blocks[i]['lines']) == 1 and \
115 116 blocks[i]['lines'][0].lstrip(' ').startswith('.. ') and \
116 117 blocks[i]['lines'][0].find(' ', 3) == -1:
117 118 # directive on its own line, not a literal block
118 119 i += 1
119 120 continue
120 121 else:
121 122 # Fully minimized form: remove just one colon.
122 123 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
123 124
124 125 # List items are formatted with a hanging indent. We must
125 126 # correct for this here while we still have the original
126 127 # information on the indentation of the subsequent literal
127 128 # blocks available.
128 129 m = _bulletre.match(blocks[i]['lines'][0])
129 130 if m:
130 131 indent += m.end()
131 132 adjustment -= m.end()
132 133
133 134 # Mark the following indented blocks.
134 135 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
135 136 blocks[i + 1]['type'] = 'literal'
136 137 blocks[i + 1]['indent'] -= adjustment
137 138 i += 1
138 139 i += 1
139 140 return blocks
140 141
141 142 _bulletre = re.compile(br'(\*|-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
142 143 _optionre = re.compile(br'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
143 144 br'((.*) +)(.*)$')
144 145 _fieldre = re.compile(br':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
145 146 _definitionre = re.compile(br'[^ ]')
146 147 _tablere = re.compile(br'(=+\s+)*=+')
147 148
148 149 def splitparagraphs(blocks):
149 150 """Split paragraphs into lists."""
150 151 # Tuples with (list type, item regexp, single line items?). Order
151 152 # matters: definition lists has the least specific regexp and must
152 153 # come last.
153 154 listtypes = [('bullet', _bulletre, True),
154 155 ('option', _optionre, True),
155 156 ('field', _fieldre, True),
156 157 ('definition', _definitionre, False)]
157 158
158 159 def match(lines, i, itemre, singleline):
159 160 """Does itemre match an item at line i?
160 161
161 162 A list item can be followed by an indented line or another list
162 163 item (but only if singleline is True).
163 164 """
164 165 line1 = lines[i]
165 166 line2 = i + 1 < len(lines) and lines[i + 1] or ''
166 167 if not itemre.match(line1):
167 168 return False
168 169 if singleline:
169 170 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
170 171 else:
171 172 return line2.startswith(' ')
172 173
173 174 i = 0
174 175 while i < len(blocks):
175 176 if blocks[i]['type'] == 'paragraph':
176 177 lines = blocks[i]['lines']
177 178 for type, itemre, singleline in listtypes:
178 179 if match(lines, 0, itemre, singleline):
179 180 items = []
180 181 for j, line in enumerate(lines):
181 182 if match(lines, j, itemre, singleline):
182 183 items.append({'type': type, 'lines': [],
183 184 'indent': blocks[i]['indent']})
184 185 items[-1]['lines'].append(line)
185 186 blocks[i:i + 1] = items
186 187 break
187 188 i += 1
188 189 return blocks
189 190
190 191 _fieldwidth = 14
191 192
192 193 def updatefieldlists(blocks):
193 194 """Find key for field lists."""
194 195 i = 0
195 196 while i < len(blocks):
196 197 if blocks[i]['type'] != 'field':
197 198 i += 1
198 199 continue
199 200
200 201 j = i
201 202 while j < len(blocks) and blocks[j]['type'] == 'field':
202 203 m = _fieldre.match(blocks[j]['lines'][0])
203 204 key, rest = m.groups()
204 205 blocks[j]['lines'][0] = rest
205 206 blocks[j]['key'] = key
206 207 j += 1
207 208
208 209 i = j + 1
209 210
210 211 return blocks
211 212
212 213 def updateoptionlists(blocks):
213 214 i = 0
214 215 while i < len(blocks):
215 216 if blocks[i]['type'] != 'option':
216 217 i += 1
217 218 continue
218 219
219 220 optstrwidth = 0
220 221 j = i
221 222 while j < len(blocks) and blocks[j]['type'] == 'option':
222 223 m = _optionre.match(blocks[j]['lines'][0])
223 224
224 225 shortoption = m.group(2)
225 226 group3 = m.group(3)
226 227 longoption = group3[2:].strip()
227 228 desc = m.group(6).strip()
228 229 longoptionarg = m.group(5).strip()
229 230 blocks[j]['lines'][0] = desc
230 231
231 232 noshortop = ''
232 233 if not shortoption:
233 234 noshortop = ' '
234 235
235 236 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
236 237 ("%s--%s %s") % (noshortop, longoption,
237 238 longoptionarg))
238 239 opt = opt.rstrip()
239 240 blocks[j]['optstr'] = opt
240 241 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
241 242 j += 1
242 243
243 244 for block in blocks[i:j]:
244 245 block['optstrwidth'] = optstrwidth
245 246 i = j + 1
246 247 return blocks
247 248
248 249 def prunecontainers(blocks, keep):
249 250 """Prune unwanted containers.
250 251
251 252 The blocks must have a 'type' field, i.e., they should have been
252 253 run through findliteralblocks first.
253 254 """
254 255 pruned = []
255 256 i = 0
256 257 while i + 1 < len(blocks):
257 258 # Searching for a block that looks like this:
258 259 #
259 260 # +-------+---------------------------+
260 261 # | ".. container ::" type |
261 262 # +---+ |
262 263 # | blocks |
263 264 # +-------------------------------+
264 265 if (blocks[i]['type'] == 'paragraph' and
265 266 blocks[i]['lines'][0].startswith('.. container::')):
266 267 indent = blocks[i]['indent']
267 268 adjustment = blocks[i + 1]['indent'] - indent
268 269 containertype = blocks[i]['lines'][0][15:]
269 270 prune = True
270 271 for c in keep:
271 272 if c in containertype.split('.'):
272 273 prune = False
273 274 if prune:
274 275 pruned.append(containertype)
275 276
276 277 # Always delete "..container:: type" block
277 278 del blocks[i]
278 279 j = i
279 280 i -= 1
280 281 while j < len(blocks) and blocks[j]['indent'] > indent:
281 282 if prune:
282 283 del blocks[j]
283 284 else:
284 285 blocks[j]['indent'] -= adjustment
285 286 j += 1
286 287 i += 1
287 288 return blocks, pruned
288 289
289 290 _sectionre = re.compile(br"""^([-=`:.'"~^_*+#])\1+$""")
290 291
291 292 def findtables(blocks):
292 293 '''Find simple tables
293 294
294 295 Only simple one-line table elements are supported
295 296 '''
296 297
297 298 for block in blocks:
298 299 # Searching for a block that looks like this:
299 300 #
300 301 # === ==== ===
301 302 # A B C
302 303 # === ==== === <- optional
303 304 # 1 2 3
304 305 # x y z
305 306 # === ==== ===
306 307 if (block['type'] == 'paragraph' and
307 308 len(block['lines']) > 2 and
308 309 _tablere.match(block['lines'][0]) and
309 310 block['lines'][0] == block['lines'][-1]):
310 311 block['type'] = 'table'
311 312 block['header'] = False
312 313 div = block['lines'][0]
313 314
314 315 # column markers are ASCII so we can calculate column
315 316 # position in bytes
316 317 columns = [x for x in xrange(len(div))
317 318 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
318 319 rows = []
319 320 for l in block['lines'][1:-1]:
320 321 if l == div:
321 322 block['header'] = True
322 323 continue
323 324 row = []
324 325 # we measure columns not in bytes or characters but in
325 326 # colwidth which makes things tricky
326 327 pos = columns[0] # leading whitespace is bytes
327 328 for n, start in enumerate(columns):
328 329 if n + 1 < len(columns):
329 330 width = columns[n + 1] - start
330 331 v = encoding.getcols(l, pos, width) # gather columns
331 332 pos += len(v) # calculate byte position of end
332 333 row.append(v.strip())
333 334 else:
334 335 row.append(l[pos:].strip())
335 336 rows.append(row)
336 337
337 338 block['table'] = rows
338 339
339 340 return blocks
340 341
341 342 def findsections(blocks):
342 343 """Finds sections.
343 344
344 345 The blocks must have a 'type' field, i.e., they should have been
345 346 run through findliteralblocks first.
346 347 """
347 348 for block in blocks:
348 349 # Searching for a block that looks like this:
349 350 #
350 351 # +------------------------------+
351 352 # | Section title |
352 353 # | ------------- |
353 354 # +------------------------------+
354 355 if (block['type'] == 'paragraph' and
355 356 len(block['lines']) == 2 and
356 357 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
357 358 _sectionre.match(block['lines'][1])):
358 359 block['underline'] = block['lines'][1][0]
359 360 block['type'] = 'section'
360 361 del block['lines'][1]
361 362 return blocks
362 363
363 364 def inlineliterals(blocks):
364 365 substs = [('``', '"')]
365 366 for b in blocks:
366 367 if b['type'] in ('paragraph', 'section'):
367 368 b['lines'] = [replace(l, substs) for l in b['lines']]
368 369 return blocks
369 370
370 371 def hgrole(blocks):
371 372 substs = [(':hg:`', "'hg "), ('`', "'")]
372 373 for b in blocks:
373 374 if b['type'] in ('paragraph', 'section'):
374 375 # Turn :hg:`command` into "hg command". This also works
375 376 # when there is a line break in the command and relies on
376 377 # the fact that we have no stray back-quotes in the input
377 378 # (run the blocks through inlineliterals first).
378 379 b['lines'] = [replace(l, substs) for l in b['lines']]
379 380 return blocks
380 381
381 382 def addmargins(blocks):
382 383 """Adds empty blocks for vertical spacing.
383 384
384 385 This groups bullets, options, and definitions together with no vertical
385 386 space between them, and adds an empty block between all other blocks.
386 387 """
387 388 i = 1
388 389 while i < len(blocks):
389 390 if (blocks[i]['type'] == blocks[i - 1]['type'] and
390 391 blocks[i]['type'] in ('bullet', 'option', 'field')):
391 392 i += 1
392 393 elif not blocks[i - 1]['lines']:
393 394 # no lines in previous block, do not separate
394 395 i += 1
395 396 else:
396 397 blocks.insert(i, {'lines': [''], 'indent': 0, 'type': 'margin'})
397 398 i += 2
398 399 return blocks
399 400
400 401 def prunecomments(blocks):
401 402 """Remove comments."""
402 403 i = 0
403 404 while i < len(blocks):
404 405 b = blocks[i]
405 406 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
406 407 b['lines'] == ['..']):
407 408 del blocks[i]
408 409 if i < len(blocks) and blocks[i]['type'] == 'margin':
409 410 del blocks[i]
410 411 else:
411 412 i += 1
412 413 return blocks
413 414
414 415
415 416 _admonitions = set([
416 417 'admonition',
417 418 'attention',
418 419 'caution',
419 420 'danger',
420 421 'error',
421 422 'hint',
422 423 'important',
423 424 'note',
424 425 'tip',
425 426 'warning',
426 427 ])
427 428
428 429 def findadmonitions(blocks, admonitions=None):
429 430 """
430 431 Makes the type of the block an admonition block if
431 432 the first line is an admonition directive
432 433 """
433 434 admonitions = admonitions or _admonitions
434 435
435 436 admonitionre = re.compile(br'\.\. (%s)::' % '|'.join(sorted(admonitions)),
436 437 flags=re.IGNORECASE)
437 438
438 439 i = 0
439 440 while i < len(blocks):
440 441 m = admonitionre.match(blocks[i]['lines'][0])
441 442 if m:
442 443 blocks[i]['type'] = 'admonition'
443 444 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
444 445
445 446 firstline = blocks[i]['lines'][0][m.end() + 1:]
446 447 if firstline:
447 448 blocks[i]['lines'].insert(1, ' ' + firstline)
448 449
449 450 blocks[i]['admonitiontitle'] = admonitiontitle
450 451 del blocks[i]['lines'][0]
451 452 i = i + 1
452 453 return blocks
453 454
454 455 _admonitiontitles = {'attention': _('Attention:'),
455 456 'caution': _('Caution:'),
456 457 'danger': _('!Danger!') ,
457 458 'error': _('Error:'),
458 459 'hint': _('Hint:'),
459 460 'important': _('Important:'),
460 461 'note': _('Note:'),
461 462 'tip': _('Tip:'),
462 463 'warning': _('Warning!')}
463 464
464 465 def formatoption(block, width):
465 466 desc = ' '.join(map(str.strip, block['lines']))
466 467 colwidth = encoding.colwidth(block['optstr'])
467 468 usablewidth = width - 1
468 469 hanging = block['optstrwidth']
469 470 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
470 471 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
471 472 return ' %s\n' % (util.wrap(desc, usablewidth,
472 473 initindent=initindent,
473 474 hangindent=hangindent))
474 475
475 476 def formatblock(block, width):
476 477 """Format a block according to width."""
477 478 if width <= 0:
478 479 width = 78
479 480 indent = ' ' * block['indent']
480 481 if block['type'] == 'admonition':
481 482 admonition = _admonitiontitles[block['admonitiontitle']]
482 483 if not block['lines']:
483 484 return indent + admonition + '\n'
484 485 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
485 486
486 487 defindent = indent + hang * ' '
487 488 text = ' '.join(map(str.strip, block['lines']))
488 489 return '%s\n%s\n' % (indent + admonition,
489 490 util.wrap(text, width=width,
490 491 initindent=defindent,
491 492 hangindent=defindent))
492 493 if block['type'] == 'margin':
493 494 return '\n'
494 495 if block['type'] == 'literal':
495 496 indent += ' '
496 497 return indent + ('\n' + indent).join(block['lines']) + '\n'
497 498 if block['type'] == 'section':
498 499 underline = encoding.colwidth(block['lines'][0]) * block['underline']
499 500 return "%s%s\n%s%s\n" % (indent, block['lines'][0],indent, underline)
500 501 if block['type'] == 'table':
501 502 table = block['table']
502 503 # compute column widths
503 504 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
504 505 text = ''
505 506 span = sum(widths) + len(widths) - 1
506 507 indent = ' ' * block['indent']
507 508 hang = ' ' * (len(indent) + span - widths[-1])
508 509
509 510 for row in table:
510 511 l = []
511 512 for w, v in zip(widths, row):
512 513 pad = ' ' * (w - encoding.colwidth(v))
513 514 l.append(v + pad)
514 515 l = ' '.join(l)
515 516 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
516 517 if not text and block['header']:
517 518 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
518 519 else:
519 520 text += l + "\n"
520 521 return text
521 522 if block['type'] == 'definition':
522 523 term = indent + block['lines'][0]
523 524 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
524 525 defindent = indent + hang * ' '
525 526 text = ' '.join(map(str.strip, block['lines'][1:]))
526 527 return '%s\n%s\n' % (term, util.wrap(text, width=width,
527 528 initindent=defindent,
528 529 hangindent=defindent))
529 530 subindent = indent
530 531 if block['type'] == 'bullet':
531 532 if block['lines'][0].startswith('| '):
532 533 # Remove bullet for line blocks and add no extra
533 534 # indentation.
534 535 block['lines'][0] = block['lines'][0][2:]
535 536 else:
536 537 m = _bulletre.match(block['lines'][0])
537 538 subindent = indent + m.end() * ' '
538 539 elif block['type'] == 'field':
539 540 key = block['key']
540 541 subindent = indent + _fieldwidth * ' '
541 542 if len(key) + 2 > _fieldwidth:
542 543 # key too large, use full line width
543 544 key = key.ljust(width)
544 545 else:
545 546 # key fits within field width
546 547 key = key.ljust(_fieldwidth)
547 548 block['lines'][0] = key + block['lines'][0]
548 549 elif block['type'] == 'option':
549 550 return formatoption(block, width)
550 551
551 552 text = ' '.join(map(str.strip, block['lines']))
552 553 return util.wrap(text, width=width,
553 554 initindent=indent,
554 555 hangindent=subindent) + '\n'
555 556
556 557 def formathtml(blocks):
557 558 """Format RST blocks as HTML"""
558 559
559 560 out = []
560 561 headernest = ''
561 562 listnest = []
562 563
563 564 def escape(s):
564 565 return cgi.escape(s, True)
565 566
566 567 def openlist(start, level):
567 568 if not listnest or listnest[-1][0] != start:
568 569 listnest.append((start, level))
569 570 out.append('<%s>\n' % start)
570 571
571 572 blocks = [b for b in blocks if b['type'] != 'margin']
572 573
573 574 for pos, b in enumerate(blocks):
574 575 btype = b['type']
575 576 level = b['indent']
576 577 lines = b['lines']
577 578
578 579 if btype == 'admonition':
579 580 admonition = escape(_admonitiontitles[b['admonitiontitle']])
580 581 text = escape(' '.join(map(str.strip, lines)))
581 582 out.append('<p>\n<b>%s</b> %s\n</p>\n' % (admonition, text))
582 583 elif btype == 'paragraph':
583 584 out.append('<p>\n%s\n</p>\n' % escape('\n'.join(lines)))
584 585 elif btype == 'margin':
585 586 pass
586 587 elif btype == 'literal':
587 588 out.append('<pre>\n%s\n</pre>\n' % escape('\n'.join(lines)))
588 589 elif btype == 'section':
589 590 i = b['underline']
590 591 if i not in headernest:
591 592 headernest += i
592 593 level = headernest.index(i) + 1
593 594 out.append('<h%d>%s</h%d>\n' % (level, escape(lines[0]), level))
594 595 elif btype == 'table':
595 596 table = b['table']
596 597 out.append('<table>\n')
597 598 for row in table:
598 599 out.append('<tr>')
599 600 for v in row:
600 601 out.append('<td>')
601 602 out.append(escape(v))
602 603 out.append('</td>')
603 604 out.append('\n')
604 605 out.pop()
605 606 out.append('</tr>\n')
606 607 out.append('</table>\n')
607 608 elif btype == 'definition':
608 609 openlist('dl', level)
609 610 term = escape(lines[0])
610 611 text = escape(' '.join(map(str.strip, lines[1:])))
611 612 out.append(' <dt>%s\n <dd>%s\n' % (term, text))
612 613 elif btype == 'bullet':
613 614 bullet, head = lines[0].split(' ', 1)
614 615 if bullet in ('*', '-'):
615 616 openlist('ul', level)
616 617 else:
617 618 openlist('ol', level)
618 619 out.append(' <li> %s\n' % escape(' '.join([head] + lines[1:])))
619 620 elif btype == 'field':
620 621 openlist('dl', level)
621 622 key = escape(b['key'])
622 623 text = escape(' '.join(map(str.strip, lines)))
623 624 out.append(' <dt>%s\n <dd>%s\n' % (key, text))
624 625 elif btype == 'option':
625 626 openlist('dl', level)
626 627 opt = escape(b['optstr'])
627 628 desc = escape(' '.join(map(str.strip, lines)))
628 629 out.append(' <dt>%s\n <dd>%s\n' % (opt, desc))
629 630
630 631 # close lists if indent level of next block is lower
631 632 if listnest:
632 633 start, level = listnest[-1]
633 634 if pos == len(blocks) - 1:
634 635 out.append('</%s>\n' % start)
635 636 listnest.pop()
636 637 else:
637 638 nb = blocks[pos + 1]
638 639 ni = nb['indent']
639 640 if (ni < level or
640 641 (ni == level and
641 642 nb['type'] not in 'definition bullet field option')):
642 643 out.append('</%s>\n' % start)
643 644 listnest.pop()
644 645
645 646 return ''.join(out)
646 647
647 648 def parse(text, indent=0, keep=None, admonitions=None):
648 649 """Parse text into a list of blocks"""
649 650 pruned = []
650 651 blocks = findblocks(text)
651 652 for b in blocks:
652 653 b['indent'] += indent
653 654 blocks = findliteralblocks(blocks)
654 655 blocks = findtables(blocks)
655 656 blocks, pruned = prunecontainers(blocks, keep or [])
656 657 blocks = findsections(blocks)
657 658 blocks = inlineliterals(blocks)
658 659 blocks = hgrole(blocks)
659 660 blocks = splitparagraphs(blocks)
660 661 blocks = updatefieldlists(blocks)
661 662 blocks = updateoptionlists(blocks)
662 663 blocks = findadmonitions(blocks, admonitions=admonitions)
663 664 blocks = addmargins(blocks)
664 665 blocks = prunecomments(blocks)
665 666 return blocks, pruned
666 667
667 668 def formatblocks(blocks, width):
668 669 text = ''.join(formatblock(b, width) for b in blocks)
669 670 return text
670 671
671 672 def format(text, width=80, indent=0, keep=None, style='plain', section=None):
672 673 """Parse and format the text according to width."""
673 674 blocks, pruned = parse(text, indent, keep or [])
674 675 parents = []
675 676 if section:
676 677 sections = getsections(blocks)
677 678 blocks = []
678 679 i = 0
679 680 lastparents = []
680 681 synthetic = []
681 682 collapse = True
682 683 while i < len(sections):
683 684 name, nest, b = sections[i]
684 685 del parents[nest:]
685 686 parents.append(i)
686 687 if name == section:
687 688 if lastparents != parents:
688 689 llen = len(lastparents)
689 690 plen = len(parents)
690 691 if llen and llen != plen:
691 692 collapse = False
692 693 s = []
693 694 for j in xrange(3, plen - 1):
694 695 parent = parents[j]
695 696 if (j >= llen or
696 697 lastparents[j] != parent):
697 698 s.append(len(blocks))
698 699 sec = sections[parent][2]
699 700 blocks.append(sec[0])
700 701 blocks.append(sec[-1])
701 702 if s:
702 703 synthetic.append(s)
703 704
704 705 lastparents = parents[:]
705 706 blocks.extend(b)
706 707
707 708 ## Also show all subnested sections
708 709 while i + 1 < len(sections) and sections[i + 1][1] > nest:
709 710 i += 1
710 711 blocks.extend(sections[i][2])
711 712 i += 1
712 713 if collapse:
713 714 synthetic.reverse()
714 715 for s in synthetic:
715 716 path = [blocks[syn]['lines'][0] for syn in s]
716 717 real = s[-1] + 2
717 718 realline = blocks[real]['lines']
718 719 realline[0] = ('"%s"' %
719 720 '.'.join(path + [realline[0]]).replace('"', ''))
720 721 del blocks[s[0]:real]
721 722
722 723 if style == 'html':
723 724 text = formathtml(blocks)
724 725 else:
725 726 text = ''.join(formatblock(b, width) for b in blocks)
726 727 if keep is None:
727 728 return text
728 729 else:
729 730 return text, pruned
730 731
731 732 def getsections(blocks):
732 733 '''return a list of (section name, nesting level, blocks) tuples'''
733 734 nest = ""
734 735 level = 0
735 736 secs = []
736 737
737 738 def getname(b):
738 739 if b['type'] == 'field':
739 740 x = b['key']
740 741 else:
741 742 x = b['lines'][0]
742 743 x = encoding.lower(x).strip('"')
743 744 if '(' in x:
744 745 x = x.split('(')[0]
745 746 return x
746 747
747 748 for b in blocks:
748 749 if b['type'] == 'section':
749 750 i = b['underline']
750 751 if i not in nest:
751 752 nest += i
752 753 level = nest.index(i) + 1
753 754 nest = nest[:level]
754 755 secs.append((getname(b), level, [b]))
755 756 elif b['type'] in ('definition', 'field'):
756 757 i = ' '
757 758 if i not in nest:
758 759 nest += i
759 760 level = nest.index(i) + 1
760 761 nest = nest[:level]
761 762 for i in range(1, len(secs) + 1):
762 763 sec = secs[-i]
763 764 if sec[1] < level:
764 765 break
765 766 siblings = [a for a in sec[2] if a['type'] == 'definition']
766 767 if siblings:
767 768 siblingindent = siblings[-1]['indent']
768 769 indent = b['indent']
769 770 if siblingindent < indent:
770 771 level += 1
771 772 break
772 773 elif siblingindent == indent:
773 774 level = sec[1]
774 775 break
775 776 secs.append((getname(b), level, [b]))
776 777 else:
777 778 if not secs:
778 779 # add an initial empty section
779 780 secs = [('', 0, [])]
780 781 if b['type'] != 'margin':
781 782 pointer = 1
782 783 bindent = b['indent']
783 784 while pointer < len(secs):
784 785 section = secs[-pointer][2][0]
785 786 if section['type'] != 'margin':
786 787 sindent = section['indent']
787 788 if len(section['lines']) > 1:
788 789 sindent += len(section['lines'][1]) - \
789 790 len(section['lines'][1].lstrip(' '))
790 791 if bindent >= sindent:
791 792 break
792 793 pointer += 1
793 794 if pointer > 1:
794 795 blevel = secs[-pointer][1]
795 796 if section['type'] != b['type']:
796 797 blevel += 1
797 798 secs.append(('', blevel, []))
798 799 secs[-1][2].append(b)
799 800 return secs
800 801
801 802 def decorateblocks(blocks, width):
802 803 '''generate a list of (section name, line text) pairs for search'''
803 804 lines = []
804 805 for s in getsections(blocks):
805 806 section = s[0]
806 807 text = formatblocks(s[2], width)
807 808 lines.append([(section, l) for l in text.splitlines(True)])
808 809 return lines
809 810
810 811 def maketable(data, indent=0, header=False):
811 812 '''Generate an RST table for the given table data as a list of lines'''
812 813
813 814 widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)]
814 815 indent = ' ' * indent
815 816 div = indent + ' '.join('=' * w for w in widths) + '\n'
816 817
817 818 out = [div]
818 819 for row in data:
819 820 l = []
820 821 for w, v in zip(widths, row):
821 822 if '\n' in v:
822 823 # only remove line breaks and indentation, long lines are
823 824 # handled by the next tool
824 825 v = ' '.join(e.lstrip() for e in v.split('\n'))
825 826 pad = ' ' * (w - encoding.colwidth(v))
826 827 l.append(v + pad)
827 828 out.append(indent + ' '.join(l) + "\n")
828 829 if header and len(data) > 1:
829 830 out.insert(2, div)
830 831 out.append(div)
831 832 return out
General Comments 0
You need to be logged in to leave comments. Login now