##// END OF EJS Templates
minirst: optimize HTML table generation a bit...
Dan Villiom Podlaski Christiansen -
r18752:fabbaa25 default
parent child Browse files
Show More
@@ -1,691 +1,695 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
17 Remember to update http://mercurial.selenic.com/wiki/HelpStyleGuide
18 when adding support for new constructs.
18 when adding support for new constructs.
19 """
19 """
20
20
21 import re
21 import re
22 import util, encoding
22 import util, encoding
23 from i18n import _
23 from i18n import _
24
24
25 import cgi
25 import cgi
26
26
27 def section(s):
27 def section(s):
28 return "%s\n%s\n\n" % (s, "\"" * encoding.colwidth(s))
28 return "%s\n%s\n\n" % (s, "\"" * encoding.colwidth(s))
29
29
30 def subsection(s):
30 def subsection(s):
31 return "%s\n%s\n\n" % (s, '=' * encoding.colwidth(s))
31 return "%s\n%s\n\n" % (s, '=' * encoding.colwidth(s))
32
32
33 def subsubsection(s):
33 def subsubsection(s):
34 return "%s\n%s\n\n" % (s, "-" * encoding.colwidth(s))
34 return "%s\n%s\n\n" % (s, "-" * encoding.colwidth(s))
35
35
36 def subsubsubsection(s):
36 def subsubsubsection(s):
37 return "%s\n%s\n\n" % (s, "." * encoding.colwidth(s))
37 return "%s\n%s\n\n" % (s, "." * encoding.colwidth(s))
38
38
39 def replace(text, substs):
39 def replace(text, substs):
40 '''
40 '''
41 Apply a list of (find, replace) pairs to a text.
41 Apply a list of (find, replace) pairs to a text.
42
42
43 >>> replace("foo bar", [('f', 'F'), ('b', 'B')])
43 >>> replace("foo bar", [('f', 'F'), ('b', 'B')])
44 'Foo Bar'
44 'Foo Bar'
45 >>> encoding.encoding = 'latin1'
45 >>> encoding.encoding = 'latin1'
46 >>> replace('\\x81\\\\', [('\\\\', '/')])
46 >>> replace('\\x81\\\\', [('\\\\', '/')])
47 '\\x81/'
47 '\\x81/'
48 >>> encoding.encoding = 'shiftjis'
48 >>> encoding.encoding = 'shiftjis'
49 >>> replace('\\x81\\\\', [('\\\\', '/')])
49 >>> replace('\\x81\\\\', [('\\\\', '/')])
50 '\\x81\\\\'
50 '\\x81\\\\'
51 '''
51 '''
52
52
53 # some character encodings (cp932 for Japanese, at least) use
53 # some character encodings (cp932 for Japanese, at least) use
54 # ASCII characters other than control/alphabet/digit as a part of
54 # ASCII characters other than control/alphabet/digit as a part of
55 # multi-bytes characters, so direct replacing with such characters
55 # multi-bytes characters, so direct replacing with such characters
56 # on strings in local encoding causes invalid byte sequences.
56 # on strings in local encoding causes invalid byte sequences.
57 utext = text.decode(encoding.encoding)
57 utext = text.decode(encoding.encoding)
58 for f, t in substs:
58 for f, t in substs:
59 utext = utext.replace(f, t)
59 utext = utext.replace(f, t)
60 return utext.encode(encoding.encoding)
60 return utext.encode(encoding.encoding)
61
61
62 _blockre = re.compile(r"\n(?:\s*\n)+")
62 _blockre = re.compile(r"\n(?:\s*\n)+")
63
63
64 def findblocks(text):
64 def findblocks(text):
65 """Find continuous blocks of lines in text.
65 """Find continuous blocks of lines in text.
66
66
67 Returns a list of dictionaries representing the blocks. Each block
67 Returns a list of dictionaries representing the blocks. Each block
68 has an 'indent' field and a 'lines' field.
68 has an 'indent' field and a 'lines' field.
69 """
69 """
70 blocks = []
70 blocks = []
71 for b in _blockre.split(text.lstrip('\n').rstrip()):
71 for b in _blockre.split(text.lstrip('\n').rstrip()):
72 lines = b.splitlines()
72 lines = b.splitlines()
73 if lines:
73 if lines:
74 indent = min((len(l) - len(l.lstrip())) for l in lines)
74 indent = min((len(l) - len(l.lstrip())) for l in lines)
75 lines = [l[indent:] for l in lines]
75 lines = [l[indent:] for l in lines]
76 blocks.append(dict(indent=indent, lines=lines))
76 blocks.append(dict(indent=indent, lines=lines))
77 return blocks
77 return blocks
78
78
79 def findliteralblocks(blocks):
79 def findliteralblocks(blocks):
80 """Finds literal blocks and adds a 'type' field to the blocks.
80 """Finds literal blocks and adds a 'type' field to the blocks.
81
81
82 Literal blocks are given the type 'literal', all other blocks are
82 Literal blocks are given the type 'literal', all other blocks are
83 given type the 'paragraph'.
83 given type the 'paragraph'.
84 """
84 """
85 i = 0
85 i = 0
86 while i < len(blocks):
86 while i < len(blocks):
87 # Searching for a block that looks like this:
87 # Searching for a block that looks like this:
88 #
88 #
89 # +------------------------------+
89 # +------------------------------+
90 # | paragraph |
90 # | paragraph |
91 # | (ends with "::") |
91 # | (ends with "::") |
92 # +------------------------------+
92 # +------------------------------+
93 # +---------------------------+
93 # +---------------------------+
94 # | indented literal block |
94 # | indented literal block |
95 # +---------------------------+
95 # +---------------------------+
96 blocks[i]['type'] = 'paragraph'
96 blocks[i]['type'] = 'paragraph'
97 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
97 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
98 indent = blocks[i]['indent']
98 indent = blocks[i]['indent']
99 adjustment = blocks[i + 1]['indent'] - indent
99 adjustment = blocks[i + 1]['indent'] - indent
100
100
101 if blocks[i]['lines'] == ['::']:
101 if blocks[i]['lines'] == ['::']:
102 # Expanded form: remove block
102 # Expanded form: remove block
103 del blocks[i]
103 del blocks[i]
104 i -= 1
104 i -= 1
105 elif blocks[i]['lines'][-1].endswith(' ::'):
105 elif blocks[i]['lines'][-1].endswith(' ::'):
106 # Partially minimized form: remove space and both
106 # Partially minimized form: remove space and both
107 # colons.
107 # colons.
108 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
108 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
109 else:
109 else:
110 # Fully minimized form: remove just one colon.
110 # Fully minimized form: remove just one colon.
111 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
111 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
112
112
113 # List items are formatted with a hanging indent. We must
113 # List items are formatted with a hanging indent. We must
114 # correct for this here while we still have the original
114 # correct for this here while we still have the original
115 # information on the indentation of the subsequent literal
115 # information on the indentation of the subsequent literal
116 # blocks available.
116 # blocks available.
117 m = _bulletre.match(blocks[i]['lines'][0])
117 m = _bulletre.match(blocks[i]['lines'][0])
118 if m:
118 if m:
119 indent += m.end()
119 indent += m.end()
120 adjustment -= m.end()
120 adjustment -= m.end()
121
121
122 # Mark the following indented blocks.
122 # Mark the following indented blocks.
123 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
123 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
124 blocks[i + 1]['type'] = 'literal'
124 blocks[i + 1]['type'] = 'literal'
125 blocks[i + 1]['indent'] -= adjustment
125 blocks[i + 1]['indent'] -= adjustment
126 i += 1
126 i += 1
127 i += 1
127 i += 1
128 return blocks
128 return blocks
129
129
130 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
130 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
131 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
131 _optionre = re.compile(r'^(-([a-zA-Z0-9]), )?(--[a-z0-9-]+)'
132 r'((.*) +)(.*)$')
132 r'((.*) +)(.*)$')
133 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
133 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
134 _definitionre = re.compile(r'[^ ]')
134 _definitionre = re.compile(r'[^ ]')
135 _tablere = re.compile(r'(=+\s+)*=+')
135 _tablere = re.compile(r'(=+\s+)*=+')
136
136
137 def splitparagraphs(blocks):
137 def splitparagraphs(blocks):
138 """Split paragraphs into lists."""
138 """Split paragraphs into lists."""
139 # Tuples with (list type, item regexp, single line items?). Order
139 # Tuples with (list type, item regexp, single line items?). Order
140 # matters: definition lists has the least specific regexp and must
140 # matters: definition lists has the least specific regexp and must
141 # come last.
141 # come last.
142 listtypes = [('bullet', _bulletre, True),
142 listtypes = [('bullet', _bulletre, True),
143 ('option', _optionre, True),
143 ('option', _optionre, True),
144 ('field', _fieldre, True),
144 ('field', _fieldre, True),
145 ('definition', _definitionre, False)]
145 ('definition', _definitionre, False)]
146
146
147 def match(lines, i, itemre, singleline):
147 def match(lines, i, itemre, singleline):
148 """Does itemre match an item at line i?
148 """Does itemre match an item at line i?
149
149
150 A list item can be followed by an indented line or another list
150 A list item can be followed by an indented line or another list
151 item (but only if singleline is True).
151 item (but only if singleline is True).
152 """
152 """
153 line1 = lines[i]
153 line1 = lines[i]
154 line2 = i + 1 < len(lines) and lines[i + 1] or ''
154 line2 = i + 1 < len(lines) and lines[i + 1] or ''
155 if not itemre.match(line1):
155 if not itemre.match(line1):
156 return False
156 return False
157 if singleline:
157 if singleline:
158 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
158 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
159 else:
159 else:
160 return line2.startswith(' ')
160 return line2.startswith(' ')
161
161
162 i = 0
162 i = 0
163 while i < len(blocks):
163 while i < len(blocks):
164 if blocks[i]['type'] == 'paragraph':
164 if blocks[i]['type'] == 'paragraph':
165 lines = blocks[i]['lines']
165 lines = blocks[i]['lines']
166 for type, itemre, singleline in listtypes:
166 for type, itemre, singleline in listtypes:
167 if match(lines, 0, itemre, singleline):
167 if match(lines, 0, itemre, singleline):
168 items = []
168 items = []
169 for j, line in enumerate(lines):
169 for j, line in enumerate(lines):
170 if match(lines, j, itemre, singleline):
170 if match(lines, j, itemre, singleline):
171 items.append(dict(type=type, lines=[],
171 items.append(dict(type=type, lines=[],
172 indent=blocks[i]['indent']))
172 indent=blocks[i]['indent']))
173 items[-1]['lines'].append(line)
173 items[-1]['lines'].append(line)
174 blocks[i:i + 1] = items
174 blocks[i:i + 1] = items
175 break
175 break
176 i += 1
176 i += 1
177 return blocks
177 return blocks
178
178
179 _fieldwidth = 14
179 _fieldwidth = 14
180
180
181 def updatefieldlists(blocks):
181 def updatefieldlists(blocks):
182 """Find key for field lists."""
182 """Find key for field lists."""
183 i = 0
183 i = 0
184 while i < len(blocks):
184 while i < len(blocks):
185 if blocks[i]['type'] != 'field':
185 if blocks[i]['type'] != 'field':
186 i += 1
186 i += 1
187 continue
187 continue
188
188
189 j = i
189 j = i
190 while j < len(blocks) and blocks[j]['type'] == 'field':
190 while j < len(blocks) and blocks[j]['type'] == 'field':
191 m = _fieldre.match(blocks[j]['lines'][0])
191 m = _fieldre.match(blocks[j]['lines'][0])
192 key, rest = m.groups()
192 key, rest = m.groups()
193 blocks[j]['lines'][0] = rest
193 blocks[j]['lines'][0] = rest
194 blocks[j]['key'] = key
194 blocks[j]['key'] = key
195 j += 1
195 j += 1
196
196
197 i = j + 1
197 i = j + 1
198
198
199 return blocks
199 return blocks
200
200
201 def updateoptionlists(blocks):
201 def updateoptionlists(blocks):
202 i = 0
202 i = 0
203 while i < len(blocks):
203 while i < len(blocks):
204 if blocks[i]['type'] != 'option':
204 if blocks[i]['type'] != 'option':
205 i += 1
205 i += 1
206 continue
206 continue
207
207
208 optstrwidth = 0
208 optstrwidth = 0
209 j = i
209 j = i
210 while j < len(blocks) and blocks[j]['type'] == 'option':
210 while j < len(blocks) and blocks[j]['type'] == 'option':
211 m = _optionre.match(blocks[j]['lines'][0])
211 m = _optionre.match(blocks[j]['lines'][0])
212
212
213 shortoption = m.group(2)
213 shortoption = m.group(2)
214 group3 = m.group(3)
214 group3 = m.group(3)
215 longoption = group3[2:].strip()
215 longoption = group3[2:].strip()
216 desc = m.group(6).strip()
216 desc = m.group(6).strip()
217 longoptionarg = m.group(5).strip()
217 longoptionarg = m.group(5).strip()
218 blocks[j]['lines'][0] = desc
218 blocks[j]['lines'][0] = desc
219
219
220 noshortop = ''
220 noshortop = ''
221 if not shortoption:
221 if not shortoption:
222 noshortop = ' '
222 noshortop = ' '
223
223
224 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
224 opt = "%s%s" % (shortoption and "-%s " % shortoption or '',
225 ("%s--%s %s") % (noshortop, longoption,
225 ("%s--%s %s") % (noshortop, longoption,
226 longoptionarg))
226 longoptionarg))
227 opt = opt.rstrip()
227 opt = opt.rstrip()
228 blocks[j]['optstr'] = opt
228 blocks[j]['optstr'] = opt
229 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
229 optstrwidth = max(optstrwidth, encoding.colwidth(opt))
230 j += 1
230 j += 1
231
231
232 for block in blocks[i:j]:
232 for block in blocks[i:j]:
233 block['optstrwidth'] = optstrwidth
233 block['optstrwidth'] = optstrwidth
234 i = j + 1
234 i = j + 1
235 return blocks
235 return blocks
236
236
237 def prunecontainers(blocks, keep):
237 def prunecontainers(blocks, keep):
238 """Prune unwanted containers.
238 """Prune unwanted containers.
239
239
240 The blocks must have a 'type' field, i.e., they should have been
240 The blocks must have a 'type' field, i.e., they should have been
241 run through findliteralblocks first.
241 run through findliteralblocks first.
242 """
242 """
243 pruned = []
243 pruned = []
244 i = 0
244 i = 0
245 while i + 1 < len(blocks):
245 while i + 1 < len(blocks):
246 # Searching for a block that looks like this:
246 # Searching for a block that looks like this:
247 #
247 #
248 # +-------+---------------------------+
248 # +-------+---------------------------+
249 # | ".. container ::" type |
249 # | ".. container ::" type |
250 # +---+ |
250 # +---+ |
251 # | blocks |
251 # | blocks |
252 # +-------------------------------+
252 # +-------------------------------+
253 if (blocks[i]['type'] == 'paragraph' and
253 if (blocks[i]['type'] == 'paragraph' and
254 blocks[i]['lines'][0].startswith('.. container::')):
254 blocks[i]['lines'][0].startswith('.. container::')):
255 indent = blocks[i]['indent']
255 indent = blocks[i]['indent']
256 adjustment = blocks[i + 1]['indent'] - indent
256 adjustment = blocks[i + 1]['indent'] - indent
257 containertype = blocks[i]['lines'][0][15:]
257 containertype = blocks[i]['lines'][0][15:]
258 prune = containertype not in keep
258 prune = containertype not in keep
259 if prune:
259 if prune:
260 pruned.append(containertype)
260 pruned.append(containertype)
261
261
262 # Always delete "..container:: type" block
262 # Always delete "..container:: type" block
263 del blocks[i]
263 del blocks[i]
264 j = i
264 j = i
265 i -= 1
265 i -= 1
266 while j < len(blocks) and blocks[j]['indent'] > indent:
266 while j < len(blocks) and blocks[j]['indent'] > indent:
267 if prune:
267 if prune:
268 del blocks[j]
268 del blocks[j]
269 else:
269 else:
270 blocks[j]['indent'] -= adjustment
270 blocks[j]['indent'] -= adjustment
271 j += 1
271 j += 1
272 i += 1
272 i += 1
273 return blocks, pruned
273 return blocks, pruned
274
274
275 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
275 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
276
276
277 def findtables(blocks):
277 def findtables(blocks):
278 '''Find simple tables
278 '''Find simple tables
279
279
280 Only simple one-line table elements are supported
280 Only simple one-line table elements are supported
281 '''
281 '''
282
282
283 for block in blocks:
283 for block in blocks:
284 # Searching for a block that looks like this:
284 # Searching for a block that looks like this:
285 #
285 #
286 # === ==== ===
286 # === ==== ===
287 # A B C
287 # A B C
288 # === ==== === <- optional
288 # === ==== === <- optional
289 # 1 2 3
289 # 1 2 3
290 # x y z
290 # x y z
291 # === ==== ===
291 # === ==== ===
292 if (block['type'] == 'paragraph' and
292 if (block['type'] == 'paragraph' and
293 len(block['lines']) > 2 and
293 len(block['lines']) > 2 and
294 _tablere.match(block['lines'][0]) and
294 _tablere.match(block['lines'][0]) and
295 block['lines'][0] == block['lines'][-1]):
295 block['lines'][0] == block['lines'][-1]):
296 block['type'] = 'table'
296 block['type'] = 'table'
297 block['header'] = False
297 block['header'] = False
298 div = block['lines'][0]
298 div = block['lines'][0]
299
299
300 # column markers are ASCII so we can calculate column
300 # column markers are ASCII so we can calculate column
301 # position in bytes
301 # position in bytes
302 columns = [x for x in xrange(len(div))
302 columns = [x for x in xrange(len(div))
303 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
303 if div[x] == '=' and (x == 0 or div[x - 1] == ' ')]
304 rows = []
304 rows = []
305 for l in block['lines'][1:-1]:
305 for l in block['lines'][1:-1]:
306 if l == div:
306 if l == div:
307 block['header'] = True
307 block['header'] = True
308 continue
308 continue
309 row = []
309 row = []
310 # we measure columns not in bytes or characters but in
310 # we measure columns not in bytes or characters but in
311 # colwidth which makes things tricky
311 # colwidth which makes things tricky
312 pos = columns[0] # leading whitespace is bytes
312 pos = columns[0] # leading whitespace is bytes
313 for n, start in enumerate(columns):
313 for n, start in enumerate(columns):
314 if n + 1 < len(columns):
314 if n + 1 < len(columns):
315 width = columns[n + 1] - start
315 width = columns[n + 1] - start
316 v = encoding.getcols(l, pos, width) # gather columns
316 v = encoding.getcols(l, pos, width) # gather columns
317 pos += len(v) # calculate byte position of end
317 pos += len(v) # calculate byte position of end
318 row.append(v.strip())
318 row.append(v.strip())
319 else:
319 else:
320 row.append(l[pos:].strip())
320 row.append(l[pos:].strip())
321 rows.append(row)
321 rows.append(row)
322
322
323 block['table'] = rows
323 block['table'] = rows
324
324
325 return blocks
325 return blocks
326
326
327 def findsections(blocks):
327 def findsections(blocks):
328 """Finds sections.
328 """Finds sections.
329
329
330 The blocks must have a 'type' field, i.e., they should have been
330 The blocks must have a 'type' field, i.e., they should have been
331 run through findliteralblocks first.
331 run through findliteralblocks first.
332 """
332 """
333 for block in blocks:
333 for block in blocks:
334 # Searching for a block that looks like this:
334 # Searching for a block that looks like this:
335 #
335 #
336 # +------------------------------+
336 # +------------------------------+
337 # | Section title |
337 # | Section title |
338 # | ------------- |
338 # | ------------- |
339 # +------------------------------+
339 # +------------------------------+
340 if (block['type'] == 'paragraph' and
340 if (block['type'] == 'paragraph' and
341 len(block['lines']) == 2 and
341 len(block['lines']) == 2 and
342 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
342 encoding.colwidth(block['lines'][0]) == len(block['lines'][1]) and
343 _sectionre.match(block['lines'][1])):
343 _sectionre.match(block['lines'][1])):
344 block['underline'] = block['lines'][1][0]
344 block['underline'] = block['lines'][1][0]
345 block['type'] = 'section'
345 block['type'] = 'section'
346 del block['lines'][1]
346 del block['lines'][1]
347 return blocks
347 return blocks
348
348
349 def inlineliterals(blocks):
349 def inlineliterals(blocks):
350 substs = [('``', '"')]
350 substs = [('``', '"')]
351 for b in blocks:
351 for b in blocks:
352 if b['type'] in ('paragraph', 'section'):
352 if b['type'] in ('paragraph', 'section'):
353 b['lines'] = [replace(l, substs) for l in b['lines']]
353 b['lines'] = [replace(l, substs) for l in b['lines']]
354 return blocks
354 return blocks
355
355
356 def hgrole(blocks):
356 def hgrole(blocks):
357 substs = [(':hg:`', '"hg '), ('`', '"')]
357 substs = [(':hg:`', '"hg '), ('`', '"')]
358 for b in blocks:
358 for b in blocks:
359 if b['type'] in ('paragraph', 'section'):
359 if b['type'] in ('paragraph', 'section'):
360 # Turn :hg:`command` into "hg command". This also works
360 # Turn :hg:`command` into "hg command". This also works
361 # when there is a line break in the command and relies on
361 # when there is a line break in the command and relies on
362 # the fact that we have no stray back-quotes in the input
362 # the fact that we have no stray back-quotes in the input
363 # (run the blocks through inlineliterals first).
363 # (run the blocks through inlineliterals first).
364 b['lines'] = [replace(l, substs) for l in b['lines']]
364 b['lines'] = [replace(l, substs) for l in b['lines']]
365 return blocks
365 return blocks
366
366
367 def addmargins(blocks):
367 def addmargins(blocks):
368 """Adds empty blocks for vertical spacing.
368 """Adds empty blocks for vertical spacing.
369
369
370 This groups bullets, options, and definitions together with no vertical
370 This groups bullets, options, and definitions together with no vertical
371 space between them, and adds an empty block between all other blocks.
371 space between them, and adds an empty block between all other blocks.
372 """
372 """
373 i = 1
373 i = 1
374 while i < len(blocks):
374 while i < len(blocks):
375 if (blocks[i]['type'] == blocks[i - 1]['type'] and
375 if (blocks[i]['type'] == blocks[i - 1]['type'] and
376 blocks[i]['type'] in ('bullet', 'option', 'field')):
376 blocks[i]['type'] in ('bullet', 'option', 'field')):
377 i += 1
377 i += 1
378 else:
378 else:
379 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
379 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
380 i += 2
380 i += 2
381 return blocks
381 return blocks
382
382
383 def prunecomments(blocks):
383 def prunecomments(blocks):
384 """Remove comments."""
384 """Remove comments."""
385 i = 0
385 i = 0
386 while i < len(blocks):
386 while i < len(blocks):
387 b = blocks[i]
387 b = blocks[i]
388 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
388 if b['type'] == 'paragraph' and (b['lines'][0].startswith('.. ') or
389 b['lines'] == ['..']):
389 b['lines'] == ['..']):
390 del blocks[i]
390 del blocks[i]
391 if i < len(blocks) and blocks[i]['type'] == 'margin':
391 if i < len(blocks) and blocks[i]['type'] == 'margin':
392 del blocks[i]
392 del blocks[i]
393 else:
393 else:
394 i += 1
394 i += 1
395 return blocks
395 return blocks
396
396
397 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
397 _admonitionre = re.compile(r"\.\. (admonition|attention|caution|danger|"
398 r"error|hint|important|note|tip|warning)::",
398 r"error|hint|important|note|tip|warning)::",
399 flags=re.IGNORECASE)
399 flags=re.IGNORECASE)
400
400
401 def findadmonitions(blocks):
401 def findadmonitions(blocks):
402 """
402 """
403 Makes the type of the block an admonition block if
403 Makes the type of the block an admonition block if
404 the first line is an admonition directive
404 the first line is an admonition directive
405 """
405 """
406 i = 0
406 i = 0
407 while i < len(blocks):
407 while i < len(blocks):
408 m = _admonitionre.match(blocks[i]['lines'][0])
408 m = _admonitionre.match(blocks[i]['lines'][0])
409 if m:
409 if m:
410 blocks[i]['type'] = 'admonition'
410 blocks[i]['type'] = 'admonition'
411 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
411 admonitiontitle = blocks[i]['lines'][0][3:m.end() - 2].lower()
412
412
413 firstline = blocks[i]['lines'][0][m.end() + 1:]
413 firstline = blocks[i]['lines'][0][m.end() + 1:]
414 if firstline:
414 if firstline:
415 blocks[i]['lines'].insert(1, ' ' + firstline)
415 blocks[i]['lines'].insert(1, ' ' + firstline)
416
416
417 blocks[i]['admonitiontitle'] = admonitiontitle
417 blocks[i]['admonitiontitle'] = admonitiontitle
418 del blocks[i]['lines'][0]
418 del blocks[i]['lines'][0]
419 i = i + 1
419 i = i + 1
420 return blocks
420 return blocks
421
421
422 _admonitiontitles = {'attention': _('Attention:'),
422 _admonitiontitles = {'attention': _('Attention:'),
423 'caution': _('Caution:'),
423 'caution': _('Caution:'),
424 'danger': _('!Danger!') ,
424 'danger': _('!Danger!') ,
425 'error': _('Error:'),
425 'error': _('Error:'),
426 'hint': _('Hint:'),
426 'hint': _('Hint:'),
427 'important': _('Important:'),
427 'important': _('Important:'),
428 'note': _('Note:'),
428 'note': _('Note:'),
429 'tip': _('Tip:'),
429 'tip': _('Tip:'),
430 'warning': _('Warning!')}
430 'warning': _('Warning!')}
431
431
432 def formatoption(block, width):
432 def formatoption(block, width):
433 desc = ' '.join(map(str.strip, block['lines']))
433 desc = ' '.join(map(str.strip, block['lines']))
434 colwidth = encoding.colwidth(block['optstr'])
434 colwidth = encoding.colwidth(block['optstr'])
435 usablewidth = width - 1
435 usablewidth = width - 1
436 hanging = block['optstrwidth']
436 hanging = block['optstrwidth']
437 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
437 initindent = '%s%s ' % (block['optstr'], ' ' * ((hanging - colwidth)))
438 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
438 hangindent = ' ' * (encoding.colwidth(initindent) + 1)
439 return ' %s\n' % (util.wrap(desc, usablewidth,
439 return ' %s\n' % (util.wrap(desc, usablewidth,
440 initindent=initindent,
440 initindent=initindent,
441 hangindent=hangindent))
441 hangindent=hangindent))
442
442
443 def formatblock(block, width):
443 def formatblock(block, width):
444 """Format a block according to width."""
444 """Format a block according to width."""
445 if width <= 0:
445 if width <= 0:
446 width = 78
446 width = 78
447 indent = ' ' * block['indent']
447 indent = ' ' * block['indent']
448 if block['type'] == 'admonition':
448 if block['type'] == 'admonition':
449 admonition = _admonitiontitles[block['admonitiontitle']]
449 admonition = _admonitiontitles[block['admonitiontitle']]
450 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
450 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
451
451
452 defindent = indent + hang * ' '
452 defindent = indent + hang * ' '
453 text = ' '.join(map(str.strip, block['lines']))
453 text = ' '.join(map(str.strip, block['lines']))
454 return '%s\n%s\n' % (indent + admonition,
454 return '%s\n%s\n' % (indent + admonition,
455 util.wrap(text, width=width,
455 util.wrap(text, width=width,
456 initindent=defindent,
456 initindent=defindent,
457 hangindent=defindent))
457 hangindent=defindent))
458 if block['type'] == 'margin':
458 if block['type'] == 'margin':
459 return '\n'
459 return '\n'
460 if block['type'] == 'literal':
460 if block['type'] == 'literal':
461 indent += ' '
461 indent += ' '
462 return indent + ('\n' + indent).join(block['lines']) + '\n'
462 return indent + ('\n' + indent).join(block['lines']) + '\n'
463 if block['type'] == 'section':
463 if block['type'] == 'section':
464 underline = encoding.colwidth(block['lines'][0]) * block['underline']
464 underline = encoding.colwidth(block['lines'][0]) * block['underline']
465 return "%s%s\n%s%s\n" % (indent, block['lines'][0],indent, underline)
465 return "%s%s\n%s%s\n" % (indent, block['lines'][0],indent, underline)
466 if block['type'] == 'table':
466 if block['type'] == 'table':
467 table = block['table']
467 table = block['table']
468 # compute column widths
468 # compute column widths
469 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
469 widths = [max([encoding.colwidth(e) for e in c]) for c in zip(*table)]
470 text = ''
470 text = ''
471 span = sum(widths) + len(widths) - 1
471 span = sum(widths) + len(widths) - 1
472 indent = ' ' * block['indent']
472 indent = ' ' * block['indent']
473 hang = ' ' * (len(indent) + span - widths[-1])
473 hang = ' ' * (len(indent) + span - widths[-1])
474
474
475 for row in table:
475 for row in table:
476 l = []
476 l = []
477 for w, v in zip(widths, row):
477 for w, v in zip(widths, row):
478 pad = ' ' * (w - encoding.colwidth(v))
478 pad = ' ' * (w - encoding.colwidth(v))
479 l.append(v + pad)
479 l.append(v + pad)
480 l = ' '.join(l)
480 l = ' '.join(l)
481 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
481 l = util.wrap(l, width=width, initindent=indent, hangindent=hang)
482 if not text and block['header']:
482 if not text and block['header']:
483 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
483 text = l + '\n' + indent + '-' * (min(width, span)) + '\n'
484 else:
484 else:
485 text += l + "\n"
485 text += l + "\n"
486 return text
486 return text
487 if block['type'] == 'definition':
487 if block['type'] == 'definition':
488 term = indent + block['lines'][0]
488 term = indent + block['lines'][0]
489 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
489 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
490 defindent = indent + hang * ' '
490 defindent = indent + hang * ' '
491 text = ' '.join(map(str.strip, block['lines'][1:]))
491 text = ' '.join(map(str.strip, block['lines'][1:]))
492 return '%s\n%s\n' % (term, util.wrap(text, width=width,
492 return '%s\n%s\n' % (term, util.wrap(text, width=width,
493 initindent=defindent,
493 initindent=defindent,
494 hangindent=defindent))
494 hangindent=defindent))
495 subindent = indent
495 subindent = indent
496 if block['type'] == 'bullet':
496 if block['type'] == 'bullet':
497 if block['lines'][0].startswith('| '):
497 if block['lines'][0].startswith('| '):
498 # Remove bullet for line blocks and add no extra
498 # Remove bullet for line blocks and add no extra
499 # indention.
499 # indention.
500 block['lines'][0] = block['lines'][0][2:]
500 block['lines'][0] = block['lines'][0][2:]
501 else:
501 else:
502 m = _bulletre.match(block['lines'][0])
502 m = _bulletre.match(block['lines'][0])
503 subindent = indent + m.end() * ' '
503 subindent = indent + m.end() * ' '
504 elif block['type'] == 'field':
504 elif block['type'] == 'field':
505 key = block['key']
505 key = block['key']
506 subindent = indent + _fieldwidth * ' '
506 subindent = indent + _fieldwidth * ' '
507 if len(key) + 2 > _fieldwidth:
507 if len(key) + 2 > _fieldwidth:
508 # key too large, use full line width
508 # key too large, use full line width
509 key = key.ljust(width)
509 key = key.ljust(width)
510 else:
510 else:
511 # key fits within field width
511 # key fits within field width
512 key = key.ljust(_fieldwidth)
512 key = key.ljust(_fieldwidth)
513 block['lines'][0] = key + block['lines'][0]
513 block['lines'][0] = key + block['lines'][0]
514 elif block['type'] == 'option':
514 elif block['type'] == 'option':
515 return formatoption(block, width)
515 return formatoption(block, width)
516
516
517 text = ' '.join(map(str.strip, block['lines']))
517 text = ' '.join(map(str.strip, block['lines']))
518 return util.wrap(text, width=width,
518 return util.wrap(text, width=width,
519 initindent=indent,
519 initindent=indent,
520 hangindent=subindent) + '\n'
520 hangindent=subindent) + '\n'
521
521
522 def formathtml(blocks):
522 def formathtml(blocks):
523 """Format RST blocks as HTML"""
523 """Format RST blocks as HTML"""
524
524
525 out = []
525 out = []
526 headernest = ''
526 headernest = ''
527 listnest = []
527 listnest = []
528
528
529 def escape(s):
529 def escape(s):
530 return cgi.escape(s, True)
530 return cgi.escape(s, True)
531
531
532 def openlist(start, level):
532 def openlist(start, level):
533 if not listnest or listnest[-1][0] != start:
533 if not listnest or listnest[-1][0] != start:
534 listnest.append((start, level))
534 listnest.append((start, level))
535 out.append('<%s>\n' % start)
535 out.append('<%s>\n' % start)
536
536
537 blocks = [b for b in blocks if b['type'] != 'margin']
537 blocks = [b for b in blocks if b['type'] != 'margin']
538
538
539 for pos, b in enumerate(blocks):
539 for pos, b in enumerate(blocks):
540 btype = b['type']
540 btype = b['type']
541 level = b['indent']
541 level = b['indent']
542 lines = b['lines']
542 lines = b['lines']
543
543
544 if btype == 'admonition':
544 if btype == 'admonition':
545 admonition = escape(_admonitiontitles[b['admonitiontitle']])
545 admonition = escape(_admonitiontitles[b['admonitiontitle']])
546 text = escape(' '.join(map(str.strip, lines)))
546 text = escape(' '.join(map(str.strip, lines)))
547 out.append('<p>\n<b>%s</b> %s\n</p>\n' % (admonition, text))
547 out.append('<p>\n<b>%s</b> %s\n</p>\n' % (admonition, text))
548 elif btype == 'paragraph':
548 elif btype == 'paragraph':
549 out.append('<p>\n%s\n</p>\n' % escape('\n'.join(lines)))
549 out.append('<p>\n%s\n</p>\n' % escape('\n'.join(lines)))
550 elif btype == 'margin':
550 elif btype == 'margin':
551 pass
551 pass
552 elif btype == 'literal':
552 elif btype == 'literal':
553 out.append('<pre>\n%s\n</pre>\n' % escape('\n'.join(lines)))
553 out.append('<pre>\n%s\n</pre>\n' % escape('\n'.join(lines)))
554 elif btype == 'section':
554 elif btype == 'section':
555 i = b['underline']
555 i = b['underline']
556 if i not in headernest:
556 if i not in headernest:
557 headernest += i
557 headernest += i
558 level = headernest.index(i) + 1
558 level = headernest.index(i) + 1
559 out.append('<h%d>%s</h%d>\n' % (level, escape(lines[0]), level))
559 out.append('<h%d>%s</h%d>\n' % (level, escape(lines[0]), level))
560 elif btype == 'table':
560 elif btype == 'table':
561 table = b['table']
561 table = b['table']
562 t = []
562 out.append('<table>\n')
563 for row in table:
563 for row in table:
564 l = []
564 out.append('<tr>')
565 for v in row:
565 for v in row:
566 l.append('<td>%s</td>' % escape(v))
566 out.append('<td>')
567 t.append('<tr>%s</tr>\n' % '\n'.join(l))
567 out.append(escape(v))
568 out.append('<table>\n%s</table>\n' % ''.join(t))
568 out.append('</td>')
569 out.append('\n')
570 out.pop()
571 out.append('</tr>\n')
572 out.append('</table>\n')
569 elif btype == 'definition':
573 elif btype == 'definition':
570 openlist('dl', level)
574 openlist('dl', level)
571 term = escape(lines[0])
575 term = escape(lines[0])
572 text = escape(' '.join(map(str.strip, lines[1:])))
576 text = escape(' '.join(map(str.strip, lines[1:])))
573 out.append(' <dt>%s\n <dd>%s\n' % (term, text))
577 out.append(' <dt>%s\n <dd>%s\n' % (term, text))
574 elif btype == 'bullet':
578 elif btype == 'bullet':
575 bullet, head = lines[0].split(' ', 1)
579 bullet, head = lines[0].split(' ', 1)
576 if bullet == '-':
580 if bullet == '-':
577 openlist('ul', level)
581 openlist('ul', level)
578 else:
582 else:
579 openlist('ol', level)
583 openlist('ol', level)
580 out.append(' <li> %s\n' % escape(' '.join([head] + lines[1:])))
584 out.append(' <li> %s\n' % escape(' '.join([head] + lines[1:])))
581 elif btype == 'field':
585 elif btype == 'field':
582 openlist('dl', level)
586 openlist('dl', level)
583 key = escape(b['key'])
587 key = escape(b['key'])
584 text = escape(' '.join(map(str.strip, lines)))
588 text = escape(' '.join(map(str.strip, lines)))
585 out.append(' <dt>%s\n <dd>%s\n' % (key, text))
589 out.append(' <dt>%s\n <dd>%s\n' % (key, text))
586 elif btype == 'option':
590 elif btype == 'option':
587 openlist('dl', level)
591 openlist('dl', level)
588 opt = escape(b['optstr'])
592 opt = escape(b['optstr'])
589 desc = escape(' '.join(map(str.strip, lines)))
593 desc = escape(' '.join(map(str.strip, lines)))
590 out.append(' <dt>%s\n <dd>%s\n' % (opt, desc))
594 out.append(' <dt>%s\n <dd>%s\n' % (opt, desc))
591
595
592 # close lists if indent level of next block is lower
596 # close lists if indent level of next block is lower
593 if listnest:
597 if listnest:
594 start, level = listnest[-1]
598 start, level = listnest[-1]
595 if pos == len(blocks) - 1:
599 if pos == len(blocks) - 1:
596 out.append('</%s>\n' % start)
600 out.append('</%s>\n' % start)
597 listnest.pop()
601 listnest.pop()
598 else:
602 else:
599 nb = blocks[pos + 1]
603 nb = blocks[pos + 1]
600 ni = nb['indent']
604 ni = nb['indent']
601 if (ni < level or
605 if (ni < level or
602 (ni == level and
606 (ni == level and
603 nb['type'] not in 'definition bullet field option')):
607 nb['type'] not in 'definition bullet field option')):
604 out.append('</%s>\n' % start)
608 out.append('</%s>\n' % start)
605 listnest.pop()
609 listnest.pop()
606
610
607 return ''.join(out)
611 return ''.join(out)
608
612
609 def parse(text, indent=0, keep=None):
613 def parse(text, indent=0, keep=None):
610 """Parse text into a list of blocks"""
614 """Parse text into a list of blocks"""
611 pruned = []
615 pruned = []
612 blocks = findblocks(text)
616 blocks = findblocks(text)
613 for b in blocks:
617 for b in blocks:
614 b['indent'] += indent
618 b['indent'] += indent
615 blocks = findliteralblocks(blocks)
619 blocks = findliteralblocks(blocks)
616 blocks = findtables(blocks)
620 blocks = findtables(blocks)
617 blocks, pruned = prunecontainers(blocks, keep or [])
621 blocks, pruned = prunecontainers(blocks, keep or [])
618 blocks = findsections(blocks)
622 blocks = findsections(blocks)
619 blocks = inlineliterals(blocks)
623 blocks = inlineliterals(blocks)
620 blocks = hgrole(blocks)
624 blocks = hgrole(blocks)
621 blocks = splitparagraphs(blocks)
625 blocks = splitparagraphs(blocks)
622 blocks = updatefieldlists(blocks)
626 blocks = updatefieldlists(blocks)
623 blocks = updateoptionlists(blocks)
627 blocks = updateoptionlists(blocks)
624 blocks = addmargins(blocks)
628 blocks = addmargins(blocks)
625 blocks = prunecomments(blocks)
629 blocks = prunecomments(blocks)
626 blocks = findadmonitions(blocks)
630 blocks = findadmonitions(blocks)
627 return blocks, pruned
631 return blocks, pruned
628
632
629 def formatblocks(blocks, width):
633 def formatblocks(blocks, width):
630 text = ''.join(formatblock(b, width) for b in blocks)
634 text = ''.join(formatblock(b, width) for b in blocks)
631 return text
635 return text
632
636
633 def format(text, width=80, indent=0, keep=None, style='plain'):
637 def format(text, width=80, indent=0, keep=None, style='plain'):
634 """Parse and format the text according to width."""
638 """Parse and format the text according to width."""
635 blocks, pruned = parse(text, indent, keep or [])
639 blocks, pruned = parse(text, indent, keep or [])
636 if style == 'html':
640 if style == 'html':
637 text = formathtml(blocks)
641 text = formathtml(blocks)
638 else:
642 else:
639 text = ''.join(formatblock(b, width) for b in blocks)
643 text = ''.join(formatblock(b, width) for b in blocks)
640 if keep is None:
644 if keep is None:
641 return text
645 return text
642 else:
646 else:
643 return text, pruned
647 return text, pruned
644
648
645 def getsections(blocks):
649 def getsections(blocks):
646 '''return a list of (section name, nesting level, blocks) tuples'''
650 '''return a list of (section name, nesting level, blocks) tuples'''
647 nest = ""
651 nest = ""
648 level = 0
652 level = 0
649 secs = []
653 secs = []
650 for b in blocks:
654 for b in blocks:
651 if b['type'] == 'section':
655 if b['type'] == 'section':
652 i = b['underline']
656 i = b['underline']
653 if i not in nest:
657 if i not in nest:
654 nest += i
658 nest += i
655 level = nest.index(i) + 1
659 level = nest.index(i) + 1
656 nest = nest[:level]
660 nest = nest[:level]
657 secs.append((b['lines'][0], level, [b]))
661 secs.append((b['lines'][0], level, [b]))
658 else:
662 else:
659 if not secs:
663 if not secs:
660 # add an initial empty section
664 # add an initial empty section
661 secs = [('', 0, [])]
665 secs = [('', 0, [])]
662 secs[-1][2].append(b)
666 secs[-1][2].append(b)
663 return secs
667 return secs
664
668
665 def decorateblocks(blocks, width):
669 def decorateblocks(blocks, width):
666 '''generate a list of (section name, line text) pairs for search'''
670 '''generate a list of (section name, line text) pairs for search'''
667 lines = []
671 lines = []
668 for s in getsections(blocks):
672 for s in getsections(blocks):
669 section = s[0]
673 section = s[0]
670 text = formatblocks(s[2], width)
674 text = formatblocks(s[2], width)
671 lines.append([(section, l) for l in text.splitlines(True)])
675 lines.append([(section, l) for l in text.splitlines(True)])
672 return lines
676 return lines
673
677
674 def maketable(data, indent=0, header=False):
678 def maketable(data, indent=0, header=False):
675 '''Generate an RST table for the given table data as a list of lines'''
679 '''Generate an RST table for the given table data as a list of lines'''
676
680
677 widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)]
681 widths = [max(encoding.colwidth(e) for e in c) for c in zip(*data)]
678 indent = ' ' * indent
682 indent = ' ' * indent
679 div = indent + ' '.join('=' * w for w in widths) + '\n'
683 div = indent + ' '.join('=' * w for w in widths) + '\n'
680
684
681 out = [div]
685 out = [div]
682 for row in data:
686 for row in data:
683 l = []
687 l = []
684 for w, v in zip(widths, row):
688 for w, v in zip(widths, row):
685 pad = ' ' * (w - encoding.colwidth(v))
689 pad = ' ' * (w - encoding.colwidth(v))
686 l.append(v + pad)
690 l.append(v + pad)
687 out.append(indent + ' '.join(l) + "\n")
691 out.append(indent + ' '.join(l) + "\n")
688 if header and len(data) > 1:
692 if header and len(data) > 1:
689 out.insert(2, div)
693 out.insert(2, div)
690 out.append(div)
694 out.append(div)
691 return out
695 return out
General Comments 0
You need to be logged in to leave comments. Login now