##// END OF EJS Templates
minirst: removed unnecessary initindent variable
Martin Geisler -
r10937:a9d5943d default
parent child Browse files
Show More
@@ -1,366 +1,366
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - sections
19 - sections
20
20
21 - paragraphs
21 - paragraphs
22
22
23 - literal blocks
23 - literal blocks
24
24
25 - definition lists
25 - definition lists
26
26
27 - bullet lists (items must start with '-')
27 - bullet lists (items must start with '-')
28
28
29 - enumerated lists (no autonumbering)
29 - enumerated lists (no autonumbering)
30
30
31 - field lists (colons cannot be escaped)
31 - field lists (colons cannot be escaped)
32
32
33 - option lists (supports only long options without arguments)
33 - option lists (supports only long options without arguments)
34
34
35 - inline literals (no other inline markup is not recognized)
35 - inline literals (no other inline markup is not recognized)
36 """
36 """
37
37
38 import re, sys, textwrap
38 import re, sys, textwrap
39
39
40
40
41 def findblocks(text):
41 def findblocks(text):
42 """Find continuous blocks of lines in text.
42 """Find continuous blocks of lines in text.
43
43
44 Returns a list of dictionaries representing the blocks. Each block
44 Returns a list of dictionaries representing the blocks. Each block
45 has an 'indent' field and a 'lines' field.
45 has an 'indent' field and a 'lines' field.
46 """
46 """
47 blocks = [[]]
47 blocks = [[]]
48 lines = text.splitlines()
48 lines = text.splitlines()
49 for line in lines:
49 for line in lines:
50 if line.strip():
50 if line.strip():
51 blocks[-1].append(line)
51 blocks[-1].append(line)
52 elif blocks[-1]:
52 elif blocks[-1]:
53 blocks.append([])
53 blocks.append([])
54 if not blocks[-1]:
54 if not blocks[-1]:
55 del blocks[-1]
55 del blocks[-1]
56
56
57 for i, block in enumerate(blocks):
57 for i, block in enumerate(blocks):
58 indent = min((len(l) - len(l.lstrip())) for l in block)
58 indent = min((len(l) - len(l.lstrip())) for l in block)
59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
60 return blocks
60 return blocks
61
61
62
62
63 def findliteralblocks(blocks):
63 def findliteralblocks(blocks):
64 """Finds literal blocks and adds a 'type' field to the blocks.
64 """Finds literal blocks and adds a 'type' field to the blocks.
65
65
66 Literal blocks are given the type 'literal', all other blocks are
66 Literal blocks are given the type 'literal', all other blocks are
67 given type the 'paragraph'.
67 given type the 'paragraph'.
68 """
68 """
69 i = 0
69 i = 0
70 while i < len(blocks):
70 while i < len(blocks):
71 # Searching for a block that looks like this:
71 # Searching for a block that looks like this:
72 #
72 #
73 # +------------------------------+
73 # +------------------------------+
74 # | paragraph |
74 # | paragraph |
75 # | (ends with "::") |
75 # | (ends with "::") |
76 # +------------------------------+
76 # +------------------------------+
77 # +---------------------------+
77 # +---------------------------+
78 # | indented literal block |
78 # | indented literal block |
79 # +---------------------------+
79 # +---------------------------+
80 blocks[i]['type'] = 'paragraph'
80 blocks[i]['type'] = 'paragraph'
81 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
81 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
82 indent = blocks[i]['indent']
82 indent = blocks[i]['indent']
83 adjustment = blocks[i + 1]['indent'] - indent
83 adjustment = blocks[i + 1]['indent'] - indent
84
84
85 if blocks[i]['lines'] == ['::']:
85 if blocks[i]['lines'] == ['::']:
86 # Expanded form: remove block
86 # Expanded form: remove block
87 del blocks[i]
87 del blocks[i]
88 i -= 1
88 i -= 1
89 elif blocks[i]['lines'][-1].endswith(' ::'):
89 elif blocks[i]['lines'][-1].endswith(' ::'):
90 # Partially minimized form: remove space and both
90 # Partially minimized form: remove space and both
91 # colons.
91 # colons.
92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
93 else:
93 else:
94 # Fully minimized form: remove just one colon.
94 # Fully minimized form: remove just one colon.
95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
96
96
97 # List items are formatted with a hanging indent. We must
97 # List items are formatted with a hanging indent. We must
98 # correct for this here while we still have the original
98 # correct for this here while we still have the original
99 # information on the indentation of the subsequent literal
99 # information on the indentation of the subsequent literal
100 # blocks available.
100 # blocks available.
101 m = _bulletre.match(blocks[i]['lines'][0])
101 m = _bulletre.match(blocks[i]['lines'][0])
102 if m:
102 if m:
103 indent += m.end()
103 indent += m.end()
104 adjustment -= m.end()
104 adjustment -= m.end()
105
105
106 # Mark the following indented blocks.
106 # Mark the following indented blocks.
107 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
107 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
108 blocks[i + 1]['type'] = 'literal'
108 blocks[i + 1]['type'] = 'literal'
109 blocks[i + 1]['indent'] -= adjustment
109 blocks[i + 1]['indent'] -= adjustment
110 i += 1
110 i += 1
111 i += 1
111 i += 1
112 return blocks
112 return blocks
113
113
114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
117 _definitionre = re.compile(r'[^ ]')
117 _definitionre = re.compile(r'[^ ]')
118
118
119 def splitparagraphs(blocks):
119 def splitparagraphs(blocks):
120 """Split paragraphs into lists."""
120 """Split paragraphs into lists."""
121 # Tuples with (list type, item regexp, single line items?). Order
121 # Tuples with (list type, item regexp, single line items?). Order
122 # matters: definition lists has the least specific regexp and must
122 # matters: definition lists has the least specific regexp and must
123 # come last.
123 # come last.
124 listtypes = [('bullet', _bulletre, True),
124 listtypes = [('bullet', _bulletre, True),
125 ('option', _optionre, True),
125 ('option', _optionre, True),
126 ('field', _fieldre, True),
126 ('field', _fieldre, True),
127 ('definition', _definitionre, False)]
127 ('definition', _definitionre, False)]
128
128
129 def match(lines, i, itemre, singleline):
129 def match(lines, i, itemre, singleline):
130 """Does itemre match an item at line i?
130 """Does itemre match an item at line i?
131
131
132 A list item can be followed by an idented line or another list
132 A list item can be followed by an idented line or another list
133 item (but only if singleline is True).
133 item (but only if singleline is True).
134 """
134 """
135 line1 = lines[i]
135 line1 = lines[i]
136 line2 = i + 1 < len(lines) and lines[i + 1] or ''
136 line2 = i + 1 < len(lines) and lines[i + 1] or ''
137 if not itemre.match(line1):
137 if not itemre.match(line1):
138 return False
138 return False
139 if singleline:
139 if singleline:
140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
141 else:
141 else:
142 return line2.startswith(' ')
142 return line2.startswith(' ')
143
143
144 i = 0
144 i = 0
145 while i < len(blocks):
145 while i < len(blocks):
146 if blocks[i]['type'] == 'paragraph':
146 if blocks[i]['type'] == 'paragraph':
147 lines = blocks[i]['lines']
147 lines = blocks[i]['lines']
148 for type, itemre, singleline in listtypes:
148 for type, itemre, singleline in listtypes:
149 if match(lines, 0, itemre, singleline):
149 if match(lines, 0, itemre, singleline):
150 items = []
150 items = []
151 for j, line in enumerate(lines):
151 for j, line in enumerate(lines):
152 if match(lines, j, itemre, singleline):
152 if match(lines, j, itemre, singleline):
153 items.append(dict(type=type, lines=[],
153 items.append(dict(type=type, lines=[],
154 indent=blocks[i]['indent']))
154 indent=blocks[i]['indent']))
155 items[-1]['lines'].append(line)
155 items[-1]['lines'].append(line)
156 blocks[i:i + 1] = items
156 blocks[i:i + 1] = items
157 break
157 break
158 i += 1
158 i += 1
159 return blocks
159 return blocks
160
160
161
161
162 _fieldwidth = 12
162 _fieldwidth = 12
163
163
164 def updatefieldlists(blocks):
164 def updatefieldlists(blocks):
165 """Find key and maximum key width for field lists."""
165 """Find key and maximum key width for field lists."""
166 i = 0
166 i = 0
167 while i < len(blocks):
167 while i < len(blocks):
168 if blocks[i]['type'] != 'field':
168 if blocks[i]['type'] != 'field':
169 i += 1
169 i += 1
170 continue
170 continue
171
171
172 keywidth = 0
172 keywidth = 0
173 j = i
173 j = i
174 while j < len(blocks) and blocks[j]['type'] == 'field':
174 while j < len(blocks) and blocks[j]['type'] == 'field':
175 m = _fieldre.match(blocks[j]['lines'][0])
175 m = _fieldre.match(blocks[j]['lines'][0])
176 key, rest = m.groups()
176 key, rest = m.groups()
177 blocks[j]['lines'][0] = rest
177 blocks[j]['lines'][0] = rest
178 blocks[j]['key'] = key
178 blocks[j]['key'] = key
179 keywidth = max(keywidth, len(key))
179 keywidth = max(keywidth, len(key))
180 j += 1
180 j += 1
181
181
182 for block in blocks[i:j]:
182 for block in blocks[i:j]:
183 block['keywidth'] = keywidth
183 block['keywidth'] = keywidth
184 i = j + 1
184 i = j + 1
185
185
186 return blocks
186 return blocks
187
187
188
188
189 def prunecontainers(blocks, keep):
189 def prunecontainers(blocks, keep):
190 """Prune unwanted containers.
190 """Prune unwanted containers.
191
191
192 The blocks must have a 'type' field, i.e., they should have been
192 The blocks must have a 'type' field, i.e., they should have been
193 run through findliteralblocks first.
193 run through findliteralblocks first.
194 """
194 """
195 pruned = []
195 pruned = []
196 i = 0
196 i = 0
197 while i + 1 < len(blocks):
197 while i + 1 < len(blocks):
198 # Searching for a block that looks like this:
198 # Searching for a block that looks like this:
199 #
199 #
200 # +-------+---------------------------+
200 # +-------+---------------------------+
201 # | ".. container ::" type |
201 # | ".. container ::" type |
202 # +---+ |
202 # +---+ |
203 # | blocks |
203 # | blocks |
204 # +-------------------------------+
204 # +-------------------------------+
205 if (blocks[i]['type'] == 'paragraph' and
205 if (blocks[i]['type'] == 'paragraph' and
206 blocks[i]['lines'][0].startswith('.. container::')):
206 blocks[i]['lines'][0].startswith('.. container::')):
207 indent = blocks[i]['indent']
207 indent = blocks[i]['indent']
208 adjustment = blocks[i + 1]['indent'] - indent
208 adjustment = blocks[i + 1]['indent'] - indent
209 containertype = blocks[i]['lines'][0][15:]
209 containertype = blocks[i]['lines'][0][15:]
210 prune = containertype not in keep
210 prune = containertype not in keep
211 if prune:
211 if prune:
212 pruned.append(containertype)
212 pruned.append(containertype)
213
213
214 # Always delete "..container:: type" block
214 # Always delete "..container:: type" block
215 del blocks[i]
215 del blocks[i]
216 j = i
216 j = i
217 while j < len(blocks) and blocks[j]['indent'] > indent:
217 while j < len(blocks) and blocks[j]['indent'] > indent:
218 if prune:
218 if prune:
219 del blocks[j]
219 del blocks[j]
220 i -= 1 # adjust outer index
220 i -= 1 # adjust outer index
221 else:
221 else:
222 blocks[j]['indent'] -= adjustment
222 blocks[j]['indent'] -= adjustment
223 j += 1
223 j += 1
224 i += 1
224 i += 1
225 return blocks, pruned
225 return blocks, pruned
226
226
227
227
228 def findsections(blocks):
228 def findsections(blocks):
229 """Finds sections.
229 """Finds sections.
230
230
231 The blocks must have a 'type' field, i.e., they should have been
231 The blocks must have a 'type' field, i.e., they should have been
232 run through findliteralblocks first.
232 run through findliteralblocks first.
233 """
233 """
234 for block in blocks:
234 for block in blocks:
235 # Searching for a block that looks like this:
235 # Searching for a block that looks like this:
236 #
236 #
237 # +------------------------------+
237 # +------------------------------+
238 # | Section title |
238 # | Section title |
239 # | ------------- |
239 # | ------------- |
240 # +------------------------------+
240 # +------------------------------+
241 if (block['type'] == 'paragraph' and
241 if (block['type'] == 'paragraph' and
242 len(block['lines']) == 2 and
242 len(block['lines']) == 2 and
243 block['lines'][1] == '-' * len(block['lines'][0])):
243 block['lines'][1] == '-' * len(block['lines'][0])):
244 block['type'] = 'section'
244 block['type'] = 'section'
245 return blocks
245 return blocks
246
246
247
247
248 def inlineliterals(blocks):
248 def inlineliterals(blocks):
249 for b in blocks:
249 for b in blocks:
250 if b['type'] == 'paragraph':
250 if b['type'] == 'paragraph':
251 b['lines'] = [l.replace('``', '"') for l in b['lines']]
251 b['lines'] = [l.replace('``', '"') for l in b['lines']]
252 return blocks
252 return blocks
253
253
254
254
255 def addmargins(blocks):
255 def addmargins(blocks):
256 """Adds empty blocks for vertical spacing.
256 """Adds empty blocks for vertical spacing.
257
257
258 This groups bullets, options, and definitions together with no vertical
258 This groups bullets, options, and definitions together with no vertical
259 space between them, and adds an empty block between all other blocks.
259 space between them, and adds an empty block between all other blocks.
260 """
260 """
261 i = 1
261 i = 1
262 while i < len(blocks):
262 while i < len(blocks):
263 if (blocks[i]['type'] == blocks[i - 1]['type'] and
263 if (blocks[i]['type'] == blocks[i - 1]['type'] and
264 blocks[i]['type'] in ('bullet', 'option', 'field')):
264 blocks[i]['type'] in ('bullet', 'option', 'field')):
265 i += 1
265 i += 1
266 else:
266 else:
267 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
267 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
268 i += 2
268 i += 2
269 return blocks
269 return blocks
270
270
271
271
272 def formatblock(block, width):
272 def formatblock(block, width):
273 """Format a block according to width."""
273 """Format a block according to width."""
274 if width <= 0:
274 if width <= 0:
275 width = 78
275 width = 78
276 indent = ' ' * block['indent']
276 indent = ' ' * block['indent']
277 if block['type'] == 'margin':
277 if block['type'] == 'margin':
278 return ''
278 return ''
279 if block['type'] == 'literal':
279 if block['type'] == 'literal':
280 indent += ' '
280 indent += ' '
281 return indent + ('\n' + indent).join(block['lines'])
281 return indent + ('\n' + indent).join(block['lines'])
282 if block['type'] == 'section':
282 if block['type'] == 'section':
283 return indent + ('\n' + indent).join(block['lines'])
283 return indent + ('\n' + indent).join(block['lines'])
284 if block['type'] == 'definition':
284 if block['type'] == 'definition':
285 term = indent + block['lines'][0]
285 term = indent + block['lines'][0]
286 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
286 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
287 defindent = indent + hang * ' '
287 defindent = indent + hang * ' '
288 text = ' '.join(map(str.strip, block['lines'][1:]))
288 text = ' '.join(map(str.strip, block['lines'][1:]))
289 return "%s\n%s" % (term, textwrap.fill(text, width=width,
289 return "%s\n%s" % (term, textwrap.fill(text, width=width,
290 initial_indent=defindent,
290 initial_indent=defindent,
291 subsequent_indent=defindent))
291 subsequent_indent=defindent))
292 initindent = subindent = indent
292 subindent = indent
293 if block['type'] == 'bullet':
293 if block['type'] == 'bullet':
294 if block['lines'][0].startswith('| '):
294 if block['lines'][0].startswith('| '):
295 # Remove bullet for line blocks and add no extra
295 # Remove bullet for line blocks and add no extra
296 # indention.
296 # indention.
297 block['lines'][0] = block['lines'][0][2:]
297 block['lines'][0] = block['lines'][0][2:]
298 else:
298 else:
299 m = _bulletre.match(block['lines'][0])
299 m = _bulletre.match(block['lines'][0])
300 subindent = indent + m.end() * ' '
300 subindent = indent + m.end() * ' '
301 elif block['type'] == 'field':
301 elif block['type'] == 'field':
302 keywidth = block['keywidth']
302 keywidth = block['keywidth']
303 key = block['key']
303 key = block['key']
304
304
305 subindent = indent + _fieldwidth * ' '
305 subindent = indent + _fieldwidth * ' '
306 if len(key) + 2 > _fieldwidth:
306 if len(key) + 2 > _fieldwidth:
307 # key too large, use full line width
307 # key too large, use full line width
308 key = key.ljust(width)
308 key = key.ljust(width)
309 elif keywidth + 2 < _fieldwidth:
309 elif keywidth + 2 < _fieldwidth:
310 # all keys are small, add only two spaces
310 # all keys are small, add only two spaces
311 key = key.ljust(keywidth + 2)
311 key = key.ljust(keywidth + 2)
312 subindent = indent + (keywidth + 2) * ' '
312 subindent = indent + (keywidth + 2) * ' '
313 else:
313 else:
314 # mixed sizes, use fieldwidth for this one
314 # mixed sizes, use fieldwidth for this one
315 key = key.ljust(_fieldwidth)
315 key = key.ljust(_fieldwidth)
316 block['lines'][0] = key + block['lines'][0]
316 block['lines'][0] = key + block['lines'][0]
317 elif block['type'] == 'option':
317 elif block['type'] == 'option':
318 m = _optionre.match(block['lines'][0])
318 m = _optionre.match(block['lines'][0])
319 option, arg, rest = m.groups()
319 option, arg, rest = m.groups()
320 subindent = indent + (len(option) + len(arg)) * ' '
320 subindent = indent + (len(option) + len(arg)) * ' '
321
321
322 text = ' '.join(map(str.strip, block['lines']))
322 text = ' '.join(map(str.strip, block['lines']))
323 return textwrap.fill(text, width=width,
323 return textwrap.fill(text, width=width,
324 initial_indent=initindent,
324 initial_indent=indent,
325 subsequent_indent=subindent)
325 subsequent_indent=subindent)
326
326
327
327
328 def format(text, width, indent=0, keep=None):
328 def format(text, width, indent=0, keep=None):
329 """Parse and format the text according to width."""
329 """Parse and format the text according to width."""
330 blocks = findblocks(text)
330 blocks = findblocks(text)
331 for b in blocks:
331 for b in blocks:
332 b['indent'] += indent
332 b['indent'] += indent
333 blocks = findliteralblocks(blocks)
333 blocks = findliteralblocks(blocks)
334 blocks, pruned = prunecontainers(blocks, keep or [])
334 blocks, pruned = prunecontainers(blocks, keep or [])
335 blocks = inlineliterals(blocks)
335 blocks = inlineliterals(blocks)
336 blocks = splitparagraphs(blocks)
336 blocks = splitparagraphs(blocks)
337 blocks = updatefieldlists(blocks)
337 blocks = updatefieldlists(blocks)
338 blocks = findsections(blocks)
338 blocks = findsections(blocks)
339 blocks = addmargins(blocks)
339 blocks = addmargins(blocks)
340 text = '\n'.join(formatblock(b, width) for b in blocks)
340 text = '\n'.join(formatblock(b, width) for b in blocks)
341 if keep is None:
341 if keep is None:
342 return text
342 return text
343 else:
343 else:
344 return text, pruned
344 return text, pruned
345
345
346
346
347 if __name__ == "__main__":
347 if __name__ == "__main__":
348 from pprint import pprint
348 from pprint import pprint
349
349
350 def debug(func, *args):
350 def debug(func, *args):
351 blocks = func(*args)
351 blocks = func(*args)
352 print "*** after %s:" % func.__name__
352 print "*** after %s:" % func.__name__
353 pprint(blocks)
353 pprint(blocks)
354 print
354 print
355 return blocks
355 return blocks
356
356
357 text = open(sys.argv[1]).read()
357 text = open(sys.argv[1]).read()
358 blocks = debug(findblocks, text)
358 blocks = debug(findblocks, text)
359 blocks = debug(findliteralblocks, blocks)
359 blocks = debug(findliteralblocks, blocks)
360 blocks = debug(prunecontainers, blocks, sys.argv[2:])
360 blocks = debug(prunecontainers, blocks, sys.argv[2:])
361 blocks = debug(inlineliterals, blocks)
361 blocks = debug(inlineliterals, blocks)
362 blocks = debug(splitparagraphs, blocks)
362 blocks = debug(splitparagraphs, blocks)
363 blocks = debug(updatefieldlists, blocks)
363 blocks = debug(updatefieldlists, blocks)
364 blocks = debug(findsections, blocks)
364 blocks = debug(findsections, blocks)
365 blocks = debug(addmargins, blocks)
365 blocks = debug(addmargins, blocks)
366 print '\n'.join(formatblock(b, 30) for b in blocks)
366 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now