##// END OF EJS Templates
minirst: simplify bullet list indentation computation
Martin Geisler -
r9292:01e58014 default
parent child Browse files
Show More
@@ -1,302 +1,302 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - paragraphs
19 - paragraphs
20
20
21 - definition lists (must use ' ' to indent definitions)
21 - definition lists (must use ' ' to indent definitions)
22
22
23 - lists (items must start with '-')
23 - lists (items must start with '-')
24
24
25 - literal blocks
25 - literal blocks
26
26
27 - option lists (supports only long options without arguments)
27 - option lists (supports only long options without arguments)
28
28
29 - inline markup is not recognized at all.
29 - inline markup is not recognized at all.
30 """
30 """
31
31
32 import re, sys, textwrap
32 import re, sys, textwrap
33
33
34
34
35 def findblocks(text):
35 def findblocks(text):
36 """Find continuous blocks of lines in text.
36 """Find continuous blocks of lines in text.
37
37
38 Returns a list of dictionaries representing the blocks. Each block
38 Returns a list of dictionaries representing the blocks. Each block
39 has an 'indent' field and a 'lines' field.
39 has an 'indent' field and a 'lines' field.
40 """
40 """
41 blocks = [[]]
41 blocks = [[]]
42 lines = text.splitlines()
42 lines = text.splitlines()
43 for line in lines:
43 for line in lines:
44 if line.strip():
44 if line.strip():
45 blocks[-1].append(line)
45 blocks[-1].append(line)
46 elif blocks[-1]:
46 elif blocks[-1]:
47 blocks.append([])
47 blocks.append([])
48 if not blocks[-1]:
48 if not blocks[-1]:
49 del blocks[-1]
49 del blocks[-1]
50
50
51 for i, block in enumerate(blocks):
51 for i, block in enumerate(blocks):
52 indent = min((len(l) - len(l.lstrip())) for l in block)
52 indent = min((len(l) - len(l.lstrip())) for l in block)
53 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
53 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
54 return blocks
54 return blocks
55
55
56
56
57 def findliteralblocks(blocks):
57 def findliteralblocks(blocks):
58 """Finds literal blocks and adds a 'type' field to the blocks.
58 """Finds literal blocks and adds a 'type' field to the blocks.
59
59
60 Literal blocks are given the type 'literal', all other blocks are
60 Literal blocks are given the type 'literal', all other blocks are
61 given type the 'paragraph'.
61 given type the 'paragraph'.
62 """
62 """
63 i = 0
63 i = 0
64 while i < len(blocks):
64 while i < len(blocks):
65 # Searching for a block that looks like this:
65 # Searching for a block that looks like this:
66 #
66 #
67 # +------------------------------+
67 # +------------------------------+
68 # | paragraph |
68 # | paragraph |
69 # | (ends with "::") |
69 # | (ends with "::") |
70 # +------------------------------+
70 # +------------------------------+
71 # +---------------------------+
71 # +---------------------------+
72 # | indented literal block |
72 # | indented literal block |
73 # +---------------------------+
73 # +---------------------------+
74 blocks[i]['type'] = 'paragraph'
74 blocks[i]['type'] = 'paragraph'
75 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
75 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
76 indent = blocks[i]['indent']
76 indent = blocks[i]['indent']
77 adjustment = blocks[i+1]['indent'] - indent
77 adjustment = blocks[i+1]['indent'] - indent
78
78
79 if blocks[i]['lines'] == ['::']:
79 if blocks[i]['lines'] == ['::']:
80 # Expanded form: remove block
80 # Expanded form: remove block
81 del blocks[i]
81 del blocks[i]
82 i -= 1
82 i -= 1
83 elif blocks[i]['lines'][-1].endswith(' ::'):
83 elif blocks[i]['lines'][-1].endswith(' ::'):
84 # Partially minimized form: remove space and both
84 # Partially minimized form: remove space and both
85 # colons.
85 # colons.
86 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
86 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
87 else:
87 else:
88 # Fully minimized form: remove just one colon.
88 # Fully minimized form: remove just one colon.
89 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
89 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
90
90
91 # List items are formatted with a hanging indent. We must
91 # List items are formatted with a hanging indent. We must
92 # correct for this here while we still have the original
92 # correct for this here while we still have the original
93 # information on the indentation of the subsequent literal
93 # information on the indentation of the subsequent literal
94 # blocks available.
94 # blocks available.
95 if blocks[i]['lines'][0].startswith('- '):
95 if blocks[i]['lines'][0].startswith('- '):
96 indent += 2
96 indent += 2
97 adjustment -= 2
97 adjustment -= 2
98
98
99 # Mark the following indented blocks.
99 # Mark the following indented blocks.
100 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
100 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
101 blocks[i+1]['type'] = 'literal'
101 blocks[i+1]['type'] = 'literal'
102 blocks[i+1]['indent'] -= adjustment
102 blocks[i+1]['indent'] -= adjustment
103 i += 1
103 i += 1
104 i += 1
104 i += 1
105 return blocks
105 return blocks
106
106
107
107
108 def findsections(blocks):
108 def findsections(blocks):
109 """Finds sections.
109 """Finds sections.
110
110
111 The blocks must have a 'type' field, i.e., they should have been
111 The blocks must have a 'type' field, i.e., they should have been
112 run through findliteralblocks first.
112 run through findliteralblocks first.
113 """
113 """
114 for block in blocks:
114 for block in blocks:
115 # Searching for a block that looks like this:
115 # Searching for a block that looks like this:
116 #
116 #
117 # +------------------------------+
117 # +------------------------------+
118 # | Section title |
118 # | Section title |
119 # | ------------- |
119 # | ------------- |
120 # +------------------------------+
120 # +------------------------------+
121 if (block['type'] == 'paragraph' and
121 if (block['type'] == 'paragraph' and
122 len(block['lines']) == 2 and
122 len(block['lines']) == 2 and
123 block['lines'][1] == '-' * len(block['lines'][0])):
123 block['lines'][1] == '-' * len(block['lines'][0])):
124 block['type'] = 'section'
124 block['type'] = 'section'
125 return blocks
125 return blocks
126
126
127
127
128 def findbulletlists(blocks):
128 def findbulletlists(blocks):
129 """Finds bullet lists.
129 """Finds bullet lists.
130
130
131 The blocks must have a 'type' field, i.e., they should have been
131 The blocks must have a 'type' field, i.e., they should have been
132 run through findliteralblocks first.
132 run through findliteralblocks first.
133 """
133 """
134 i = 0
134 i = 0
135 while i < len(blocks):
135 while i < len(blocks):
136 # Searching for a paragraph that looks like this:
136 # Searching for a paragraph that looks like this:
137 #
137 #
138 # +------+-----------------------+
138 # +------+-----------------------+
139 # | "- " | list item |
139 # | "- " | list item |
140 # +------| (body elements)+ |
140 # +------| (body elements)+ |
141 # +-----------------------+
141 # +-----------------------+
142 if (blocks[i]['type'] == 'paragraph' and
142 if (blocks[i]['type'] == 'paragraph' and
143 blocks[i]['lines'][0].startswith('- ')):
143 blocks[i]['lines'][0].startswith('- ')):
144 items = []
144 items = []
145 for line in blocks[i]['lines']:
145 for line in blocks[i]['lines']:
146 if line.startswith('- '):
146 if line.startswith('- '):
147 items.append(dict(type='bullet', lines=[],
147 items.append(dict(type='bullet', lines=[],
148 indent=blocks[i]['indent'] + 2))
148 indent=blocks[i]['indent']))
149 line = line[2:]
149 line = line[2:]
150 items[-1]['lines'].append(line)
150 items[-1]['lines'].append(line)
151 blocks[i:i+1] = items
151 blocks[i:i+1] = items
152 i += len(items) - 1
152 i += len(items) - 1
153 i += 1
153 i += 1
154 return blocks
154 return blocks
155
155
156
156
157 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
157 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
158 def findoptionlists(blocks):
158 def findoptionlists(blocks):
159 """Finds option lists.
159 """Finds option lists.
160
160
161 The blocks must have a 'type' field, i.e., they should have been
161 The blocks must have a 'type' field, i.e., they should have been
162 run through findliteralblocks first.
162 run through findliteralblocks first.
163 """
163 """
164 i = 0
164 i = 0
165 while i < len(blocks):
165 while i < len(blocks):
166 # Searching for a paragraph that looks like this:
166 # Searching for a paragraph that looks like this:
167 #
167 #
168 # +----------------------------+-------------+
168 # +----------------------------+-------------+
169 # | "--" option " " | description |
169 # | "--" option " " | description |
170 # +-------+--------------------+ |
170 # +-------+--------------------+ |
171 # | (body elements)+ |
171 # | (body elements)+ |
172 # +----------------------------------+
172 # +----------------------------------+
173 if (blocks[i]['type'] == 'paragraph' and
173 if (blocks[i]['type'] == 'paragraph' and
174 _optionre.match(blocks[i]['lines'][0])):
174 _optionre.match(blocks[i]['lines'][0])):
175 options = []
175 options = []
176 for line in blocks[i]['lines']:
176 for line in blocks[i]['lines']:
177 m = _optionre.match(line)
177 m = _optionre.match(line)
178 if m:
178 if m:
179 option, arg, rest = m.groups()
179 option, arg, rest = m.groups()
180 width = len(option) + len(arg)
180 width = len(option) + len(arg)
181 options.append(dict(type='option', lines=[],
181 options.append(dict(type='option', lines=[],
182 indent=blocks[i]['indent'],
182 indent=blocks[i]['indent'],
183 width=width))
183 width=width))
184 options[-1]['lines'].append(line)
184 options[-1]['lines'].append(line)
185 blocks[i:i+1] = options
185 blocks[i:i+1] = options
186 i += len(options) - 1
186 i += len(options) - 1
187 i += 1
187 i += 1
188 return blocks
188 return blocks
189
189
190
190
191 def finddefinitionlists(blocks):
191 def finddefinitionlists(blocks):
192 """Finds definition lists.
192 """Finds definition lists.
193
193
194 The blocks must have a 'type' field, i.e., they should have been
194 The blocks must have a 'type' field, i.e., they should have been
195 run through findliteralblocks first.
195 run through findliteralblocks first.
196 """
196 """
197 i = 0
197 i = 0
198 while i < len(blocks):
198 while i < len(blocks):
199 # Searching for a paragraph that looks like this:
199 # Searching for a paragraph that looks like this:
200 #
200 #
201 # +----------------------------+
201 # +----------------------------+
202 # | term |
202 # | term |
203 # +--+-------------------------+--+
203 # +--+-------------------------+--+
204 # | definition |
204 # | definition |
205 # | (body elements)+ |
205 # | (body elements)+ |
206 # +----------------------------+
206 # +----------------------------+
207 if (blocks[i]['type'] == 'paragraph' and
207 if (blocks[i]['type'] == 'paragraph' and
208 len(blocks[i]['lines']) > 1 and
208 len(blocks[i]['lines']) > 1 and
209 not blocks[i]['lines'][0].startswith(' ') and
209 not blocks[i]['lines'][0].startswith(' ') and
210 blocks[i]['lines'][1].startswith(' ')):
210 blocks[i]['lines'][1].startswith(' ')):
211 definitions = []
211 definitions = []
212 for line in blocks[i]['lines']:
212 for line in blocks[i]['lines']:
213 if not line.startswith(' '):
213 if not line.startswith(' '):
214 definitions.append(dict(type='definition', lines=[],
214 definitions.append(dict(type='definition', lines=[],
215 indent=blocks[i]['indent']))
215 indent=blocks[i]['indent']))
216 definitions[-1]['lines'].append(line)
216 definitions[-1]['lines'].append(line)
217 definitions[-1]['hang'] = len(line) - len(line.lstrip())
217 definitions[-1]['hang'] = len(line) - len(line.lstrip())
218 blocks[i:i+1] = definitions
218 blocks[i:i+1] = definitions
219 i += len(definitions) - 1
219 i += len(definitions) - 1
220 i += 1
220 i += 1
221 return blocks
221 return blocks
222
222
223
223
224 def addmargins(blocks):
224 def addmargins(blocks):
225 """Adds empty blocks for vertical spacing.
225 """Adds empty blocks for vertical spacing.
226
226
227 This groups bullets, options, and definitions together with no vertical
227 This groups bullets, options, and definitions together with no vertical
228 space between them, and adds an empty block between all other blocks.
228 space between them, and adds an empty block between all other blocks.
229 """
229 """
230 i = 1
230 i = 1
231 while i < len(blocks):
231 while i < len(blocks):
232 if (blocks[i]['type'] == blocks[i-1]['type'] and
232 if (blocks[i]['type'] == blocks[i-1]['type'] and
233 blocks[i]['type'] in ('bullet', 'option', 'definition')):
233 blocks[i]['type'] in ('bullet', 'option', 'definition')):
234 i += 1
234 i += 1
235 else:
235 else:
236 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
236 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
237 i += 2
237 i += 2
238 return blocks
238 return blocks
239
239
240
240
241 def formatblock(block, width):
241 def formatblock(block, width):
242 """Format a block according to width."""
242 """Format a block according to width."""
243 indent = ' ' * block['indent']
243 indent = ' ' * block['indent']
244 if block['type'] == 'margin':
244 if block['type'] == 'margin':
245 return ''
245 return ''
246 elif block['type'] == 'literal':
246 elif block['type'] == 'literal':
247 indent += ' '
247 indent += ' '
248 return indent + ('\n' + indent).join(block['lines'])
248 return indent + ('\n' + indent).join(block['lines'])
249 elif block['type'] == 'section':
249 elif block['type'] == 'section':
250 return indent + ('\n' + indent).join(block['lines'])
250 return indent + ('\n' + indent).join(block['lines'])
251 elif block['type'] == 'definition':
251 elif block['type'] == 'definition':
252 term = indent + block['lines'][0]
252 term = indent + block['lines'][0]
253 defindent = indent + block['hang'] * ' '
253 defindent = indent + block['hang'] * ' '
254 text = ' '.join(map(str.strip, block['lines'][1:]))
254 text = ' '.join(map(str.strip, block['lines'][1:]))
255 return "%s\n%s" % (term, textwrap.fill(text, width=width,
255 return "%s\n%s" % (term, textwrap.fill(text, width=width,
256 initial_indent=defindent,
256 initial_indent=defindent,
257 subsequent_indent=defindent))
257 subsequent_indent=defindent))
258 else:
258 else:
259 initindent = subindent = indent
259 initindent = subindent = indent
260 text = ' '.join(map(str.strip, block['lines']))
260 text = ' '.join(map(str.strip, block['lines']))
261 if block['type'] == 'bullet':
261 if block['type'] == 'bullet':
262 initindent = indent[:-2] + '- '
262 initindent = indent + '- '
263 subindent = indent
263 subindent = indent + ' '
264 elif block['type'] == 'option':
264 elif block['type'] == 'option':
265 subindent = indent + block['width'] * ' '
265 subindent = indent + block['width'] * ' '
266
266
267 return textwrap.fill(text, width=width,
267 return textwrap.fill(text, width=width,
268 initial_indent=initindent,
268 initial_indent=initindent,
269 subsequent_indent=subindent)
269 subsequent_indent=subindent)
270
270
271
271
272 def format(text, width):
272 def format(text, width):
273 """Parse and format the text according to width."""
273 """Parse and format the text according to width."""
274 blocks = findblocks(text)
274 blocks = findblocks(text)
275 blocks = findliteralblocks(blocks)
275 blocks = findliteralblocks(blocks)
276 blocks = findsections(blocks)
276 blocks = findsections(blocks)
277 blocks = findbulletlists(blocks)
277 blocks = findbulletlists(blocks)
278 blocks = findoptionlists(blocks)
278 blocks = findoptionlists(blocks)
279 blocks = finddefinitionlists(blocks)
279 blocks = finddefinitionlists(blocks)
280 blocks = addmargins(blocks)
280 blocks = addmargins(blocks)
281 return '\n'.join(formatblock(b, width) for b in blocks)
281 return '\n'.join(formatblock(b, width) for b in blocks)
282
282
283
283
284 if __name__ == "__main__":
284 if __name__ == "__main__":
285 from pprint import pprint
285 from pprint import pprint
286
286
287 def debug(func, blocks):
287 def debug(func, blocks):
288 blocks = func(blocks)
288 blocks = func(blocks)
289 print "*** after %s:" % func.__name__
289 print "*** after %s:" % func.__name__
290 pprint(blocks)
290 pprint(blocks)
291 print
291 print
292 return blocks
292 return blocks
293
293
294 text = open(sys.argv[1]).read()
294 text = open(sys.argv[1]).read()
295 blocks = debug(findblocks, text)
295 blocks = debug(findblocks, text)
296 blocks = debug(findliteralblocks, blocks)
296 blocks = debug(findliteralblocks, blocks)
297 blocks = debug(findsections, blocks)
297 blocks = debug(findsections, blocks)
298 blocks = debug(findbulletlists, blocks)
298 blocks = debug(findbulletlists, blocks)
299 blocks = debug(findoptionlists, blocks)
299 blocks = debug(findoptionlists, blocks)
300 blocks = debug(finddefinitionlists, blocks)
300 blocks = debug(finddefinitionlists, blocks)
301 blocks = debug(addmargins, blocks)
301 blocks = debug(addmargins, blocks)
302 print '\n'.join(formatblock(b, 30) for b in blocks)
302 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now