##// END OF EJS Templates
minirst: combine list parsing in one function...
Martin Geisler -
r9737:5f101af4 default
parent child Browse files
Show More
@@ -1,354 +1,272 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - paragraphs
19 - paragraphs
20
20
21 - definition lists (must use ' ' to indent definitions)
21 - definition lists (must use ' ' to indent definitions)
22
22
23 - lists (items must start with '-')
23 - lists (items must start with '-')
24
24
25 - field lists (colons cannot be escaped)
25 - field lists (colons cannot be escaped)
26
26
27 - literal blocks
27 - literal blocks
28
28
29 - option lists (supports only long options without arguments)
29 - option lists (supports only long options without arguments)
30
30
31 - inline markup is not recognized at all.
31 - inline markup is not recognized at all.
32 """
32 """
33
33
34 import re, sys, textwrap
34 import re, sys, textwrap
35
35
36
36
37 def findblocks(text):
37 def findblocks(text):
38 """Find continuous blocks of lines in text.
38 """Find continuous blocks of lines in text.
39
39
40 Returns a list of dictionaries representing the blocks. Each block
40 Returns a list of dictionaries representing the blocks. Each block
41 has an 'indent' field and a 'lines' field.
41 has an 'indent' field and a 'lines' field.
42 """
42 """
43 blocks = [[]]
43 blocks = [[]]
44 lines = text.splitlines()
44 lines = text.splitlines()
45 for line in lines:
45 for line in lines:
46 if line.strip():
46 if line.strip():
47 blocks[-1].append(line)
47 blocks[-1].append(line)
48 elif blocks[-1]:
48 elif blocks[-1]:
49 blocks.append([])
49 blocks.append([])
50 if not blocks[-1]:
50 if not blocks[-1]:
51 del blocks[-1]
51 del blocks[-1]
52
52
53 for i, block in enumerate(blocks):
53 for i, block in enumerate(blocks):
54 indent = min((len(l) - len(l.lstrip())) for l in block)
54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 return blocks
56 return blocks
57
57
58
58
59 def findliteralblocks(blocks):
59 def findliteralblocks(blocks):
60 """Finds literal blocks and adds a 'type' field to the blocks.
60 """Finds literal blocks and adds a 'type' field to the blocks.
61
61
62 Literal blocks are given the type 'literal', all other blocks are
62 Literal blocks are given the type 'literal', all other blocks are
63 given type the 'paragraph'.
63 given type the 'paragraph'.
64 """
64 """
65 i = 0
65 i = 0
66 while i < len(blocks):
66 while i < len(blocks):
67 # Searching for a block that looks like this:
67 # Searching for a block that looks like this:
68 #
68 #
69 # +------------------------------+
69 # +------------------------------+
70 # | paragraph |
70 # | paragraph |
71 # | (ends with "::") |
71 # | (ends with "::") |
72 # +------------------------------+
72 # +------------------------------+
73 # +---------------------------+
73 # +---------------------------+
74 # | indented literal block |
74 # | indented literal block |
75 # +---------------------------+
75 # +---------------------------+
76 blocks[i]['type'] = 'paragraph'
76 blocks[i]['type'] = 'paragraph'
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 indent = blocks[i]['indent']
78 indent = blocks[i]['indent']
79 adjustment = blocks[i+1]['indent'] - indent
79 adjustment = blocks[i+1]['indent'] - indent
80
80
81 if blocks[i]['lines'] == ['::']:
81 if blocks[i]['lines'] == ['::']:
82 # Expanded form: remove block
82 # Expanded form: remove block
83 del blocks[i]
83 del blocks[i]
84 i -= 1
84 i -= 1
85 elif blocks[i]['lines'][-1].endswith(' ::'):
85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 # Partially minimized form: remove space and both
86 # Partially minimized form: remove space and both
87 # colons.
87 # colons.
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 else:
89 else:
90 # Fully minimized form: remove just one colon.
90 # Fully minimized form: remove just one colon.
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92
92
93 # List items are formatted with a hanging indent. We must
93 # List items are formatted with a hanging indent. We must
94 # correct for this here while we still have the original
94 # correct for this here while we still have the original
95 # information on the indentation of the subsequent literal
95 # information on the indentation of the subsequent literal
96 # blocks available.
96 # blocks available.
97 if blocks[i]['lines'][0].startswith('- '):
97 if blocks[i]['lines'][0].startswith('- '):
98 indent += 2
98 indent += 2
99 adjustment -= 2
99 adjustment -= 2
100
100
101 # Mark the following indented blocks.
101 # Mark the following indented blocks.
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 blocks[i+1]['type'] = 'literal'
103 blocks[i+1]['type'] = 'literal'
104 blocks[i+1]['indent'] -= adjustment
104 blocks[i+1]['indent'] -= adjustment
105 i += 1
105 i += 1
106 i += 1
106 i += 1
107 return blocks
107 return blocks
108
108
109 _bulletre = re.compile(r'- ')
110 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
111 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
112 _definitionre = re.compile(r'[^ ]')
113
114 def splitparagraphs(blocks):
115 """Split paragraphs into lists."""
116 # Tuples with (list type, item regexp, single line items?). Order
117 # matters: definition lists has the least specific regexp and must
118 # come last.
119 listtypes = [('bullet', _bulletre, True),
120 ('option', _optionre, True),
121 ('field', _fieldre, True),
122 ('definition', _definitionre, False)]
123
124 def match(lines, i, itemre, singleline):
125 """Does itemre match an item at line i?
126
127 A list item can be followed by an idented line or another list
128 item (but only if singleline is True).
129 """
130 line1 = lines[i]
131 line2 = i+1 < len(lines) and lines[i+1] or ''
132 if not itemre.match(line1):
133 return False
134 if singleline:
135 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
136 else:
137 return line2.startswith(' ')
138
139 i = 0
140 while i < len(blocks):
141 if blocks[i]['type'] == 'paragraph':
142 lines = blocks[i]['lines']
143 for type, itemre, singleline in listtypes:
144 if match(lines, 0, itemre, singleline):
145 items = []
146 for j, line in enumerate(lines):
147 if match(lines, j, itemre, singleline):
148 items.append(dict(type=type, lines=[],
149 indent=blocks[i]['indent']))
150 items[-1]['lines'].append(line)
151 blocks[i:i+1] = items
152 break
153 i += 1
154 return blocks
155
109
156
110 def findsections(blocks):
157 def findsections(blocks):
111 """Finds sections.
158 """Finds sections.
112
159
113 The blocks must have a 'type' field, i.e., they should have been
160 The blocks must have a 'type' field, i.e., they should have been
114 run through findliteralblocks first.
161 run through findliteralblocks first.
115 """
162 """
116 for block in blocks:
163 for block in blocks:
117 # Searching for a block that looks like this:
164 # Searching for a block that looks like this:
118 #
165 #
119 # +------------------------------+
166 # +------------------------------+
120 # | Section title |
167 # | Section title |
121 # | ------------- |
168 # | ------------- |
122 # +------------------------------+
169 # +------------------------------+
123 if (block['type'] == 'paragraph' and
170 if (block['type'] == 'paragraph' and
124 len(block['lines']) == 2 and
171 len(block['lines']) == 2 and
125 block['lines'][1] == '-' * len(block['lines'][0])):
172 block['lines'][1] == '-' * len(block['lines'][0])):
126 block['type'] = 'section'
173 block['type'] = 'section'
127 return blocks
174 return blocks
128
175
129
176
130 def findbulletlists(blocks):
131 """Finds bullet lists.
132
133 The blocks must have a 'type' field, i.e., they should have been
134 run through findliteralblocks first.
135 """
136 i = 0
137 while i < len(blocks):
138 # Searching for a paragraph that looks like this:
139 #
140 # +------+-----------------------+
141 # | "- " | list item |
142 # +------| (body elements)+ |
143 # +-----------------------+
144 if (blocks[i]['type'] == 'paragraph' and
145 blocks[i]['lines'][0].startswith('- ')):
146 items = []
147 for line in blocks[i]['lines']:
148 if line.startswith('- '):
149 items.append(dict(type='bullet', lines=[],
150 indent=blocks[i]['indent']))
151 line = line[2:]
152 items[-1]['lines'].append(line)
153 blocks[i:i+1] = items
154 i += len(items) - 1
155 i += 1
156 return blocks
157
158
159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
160 def findoptionlists(blocks):
161 """Finds option lists.
162
163 The blocks must have a 'type' field, i.e., they should have been
164 run through findliteralblocks first.
165 """
166 i = 0
167 while i < len(blocks):
168 # Searching for a paragraph that looks like this:
169 #
170 # +----------------------------+-------------+
171 # | "--" option " " | description |
172 # +-------+--------------------+ |
173 # | (body elements)+ |
174 # +----------------------------------+
175 if (blocks[i]['type'] == 'paragraph' and
176 _optionre.match(blocks[i]['lines'][0])):
177 options = []
178 for line in blocks[i]['lines']:
179 m = _optionre.match(line)
180 if m:
181 option, arg, rest = m.groups()
182 width = len(option) + len(arg)
183 options.append(dict(type='option', lines=[],
184 indent=blocks[i]['indent'],
185 width=width))
186 options[-1]['lines'].append(line)
187 blocks[i:i+1] = options
188 i += len(options) - 1
189 i += 1
190 return blocks
191
192
193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
194 def findfieldlists(blocks):
195 """Finds fields lists.
196
197 The blocks must have a 'type' field, i.e., they should have been
198 run through findliteralblocks first.
199 """
200 i = 0
201 while i < len(blocks):
202 # Searching for a paragraph that looks like this:
203 #
204 #
205 # +--------------------+----------------------+
206 # | ":" field name ":" | field body |
207 # +-------+------------+ |
208 # | (body elements)+ |
209 # +-----------------------------------+
210 if (blocks[i]['type'] == 'paragraph' and
211 _fieldre.match(blocks[i]['lines'][0])):
212 indent = blocks[i]['indent']
213 fields = []
214 for line in blocks[i]['lines']:
215 m = _fieldre.match(line)
216 if m:
217 key, spaces, rest = m.groups()
218 width = 2 + len(key) + len(spaces)
219 fields.append(dict(type='field', lines=[],
220 indent=indent, width=width))
221 # Turn ":foo: bar" into "foo bar".
222 line = '%s %s%s' % (key, spaces, rest)
223 fields[-1]['lines'].append(line)
224 blocks[i:i+1] = fields
225 i += len(fields) - 1
226 i += 1
227 return blocks
228
229
230 def finddefinitionlists(blocks):
231 """Finds definition lists.
232
233 The blocks must have a 'type' field, i.e., they should have been
234 run through findliteralblocks first.
235 """
236 i = 0
237 while i < len(blocks):
238 # Searching for a paragraph that looks like this:
239 #
240 # +----------------------------+
241 # | term |
242 # +--+-------------------------+--+
243 # | definition |
244 # | (body elements)+ |
245 # +----------------------------+
246 if (blocks[i]['type'] == 'paragraph' and
247 len(blocks[i]['lines']) > 1 and
248 not blocks[i]['lines'][0].startswith(' ') and
249 blocks[i]['lines'][1].startswith(' ')):
250 definitions = []
251 for line in blocks[i]['lines']:
252 if not line.startswith(' '):
253 definitions.append(dict(type='definition', lines=[],
254 indent=blocks[i]['indent']))
255 definitions[-1]['lines'].append(line)
256 definitions[-1]['hang'] = len(line) - len(line.lstrip())
257 blocks[i:i+1] = definitions
258 i += len(definitions) - 1
259 i += 1
260 return blocks
261
262
263 def inlineliterals(blocks):
177 def inlineliterals(blocks):
264 for b in blocks:
178 for b in blocks:
265 if b['type'] == 'paragraph':
179 if b['type'] == 'paragraph':
266 b['lines'] = [l.replace('``', '"') for l in b['lines']]
180 b['lines'] = [l.replace('``', '"') for l in b['lines']]
267 return blocks
181 return blocks
268
182
269
183
270 def addmargins(blocks):
184 def addmargins(blocks):
271 """Adds empty blocks for vertical spacing.
185 """Adds empty blocks for vertical spacing.
272
186
273 This groups bullets, options, and definitions together with no vertical
187 This groups bullets, options, and definitions together with no vertical
274 space between them, and adds an empty block between all other blocks.
188 space between them, and adds an empty block between all other blocks.
275 """
189 """
276 i = 1
190 i = 1
277 while i < len(blocks):
191 while i < len(blocks):
278 if (blocks[i]['type'] == blocks[i-1]['type'] and
192 if (blocks[i]['type'] == blocks[i-1]['type'] and
279 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
193 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
280 i += 1
194 i += 1
281 else:
195 else:
282 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
196 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
283 i += 2
197 i += 2
284 return blocks
198 return blocks
285
199
286
200
287 def formatblock(block, width):
201 def formatblock(block, width):
288 """Format a block according to width."""
202 """Format a block according to width."""
289 if width <= 0:
203 if width <= 0:
290 width = 78
204 width = 78
291 indent = ' ' * block['indent']
205 indent = ' ' * block['indent']
292 if block['type'] == 'margin':
206 if block['type'] == 'margin':
293 return ''
207 return ''
294 if block['type'] == 'literal':
208 if block['type'] == 'literal':
295 indent += ' '
209 indent += ' '
296 return indent + ('\n' + indent).join(block['lines'])
210 return indent + ('\n' + indent).join(block['lines'])
297 if block['type'] == 'section':
211 if block['type'] == 'section':
298 return indent + ('\n' + indent).join(block['lines'])
212 return indent + ('\n' + indent).join(block['lines'])
299 if block['type'] == 'definition':
213 if block['type'] == 'definition':
300 term = indent + block['lines'][0]
214 term = indent + block['lines'][0]
301 defindent = indent + block['hang'] * ' '
215 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
216 defindent = indent + hang * ' '
302 text = ' '.join(map(str.strip, block['lines'][1:]))
217 text = ' '.join(map(str.strip, block['lines'][1:]))
303 return "%s\n%s" % (term, textwrap.fill(text, width=width,
218 return "%s\n%s" % (term, textwrap.fill(text, width=width,
304 initial_indent=defindent,
219 initial_indent=defindent,
305 subsequent_indent=defindent))
220 subsequent_indent=defindent))
306 initindent = subindent = indent
221 initindent = subindent = indent
307 text = ' '.join(map(str.strip, block['lines']))
308 if block['type'] == 'bullet':
222 if block['type'] == 'bullet':
309 initindent = indent + '- '
310 subindent = indent + ' '
223 subindent = indent + ' '
311 elif block['type'] in ('option', 'field'):
224 elif block['type'] == 'field':
312 subindent = indent + block['width'] * ' '
225 m = _fieldre.match(block['lines'][0])
226 if m:
227 key, spaces, rest = m.groups()
228 # Turn ":foo: bar" into "foo bar".
229 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
230 subindent = indent + (2 + len(key) + len(spaces)) * ' '
231 elif block['type'] == 'option':
232 m = _optionre.match(block['lines'][0])
233 if m:
234 option, arg, rest = m.groups()
235 subindent = indent + (len(option) + len(arg)) * ' '
313
236
237 text = ' '.join(map(str.strip, block['lines']))
314 return textwrap.fill(text, width=width,
238 return textwrap.fill(text, width=width,
315 initial_indent=initindent,
239 initial_indent=initindent,
316 subsequent_indent=subindent)
240 subsequent_indent=subindent)
317
241
318
242
319 def format(text, width, indent=0):
243 def format(text, width, indent=0):
320 """Parse and format the text according to width."""
244 """Parse and format the text according to width."""
321 blocks = findblocks(text)
245 blocks = findblocks(text)
322 for b in blocks:
246 for b in blocks:
323 b['indent'] += indent
247 b['indent'] += indent
324 blocks = findliteralblocks(blocks)
248 blocks = findliteralblocks(blocks)
325 blocks = inlineliterals(blocks)
249 blocks = inlineliterals(blocks)
250 blocks = splitparagraphs(blocks)
326 blocks = findsections(blocks)
251 blocks = findsections(blocks)
327 blocks = findbulletlists(blocks)
328 blocks = findoptionlists(blocks)
329 blocks = findfieldlists(blocks)
330 blocks = finddefinitionlists(blocks)
331 blocks = addmargins(blocks)
252 blocks = addmargins(blocks)
332 return '\n'.join(formatblock(b, width) for b in blocks)
253 return '\n'.join(formatblock(b, width) for b in blocks)
333
254
334
255
335 if __name__ == "__main__":
256 if __name__ == "__main__":
336 from pprint import pprint
257 from pprint import pprint
337
258
338 def debug(func, blocks):
259 def debug(func, blocks):
339 blocks = func(blocks)
260 blocks = func(blocks)
340 print "*** after %s:" % func.__name__
261 print "*** after %s:" % func.__name__
341 pprint(blocks)
262 pprint(blocks)
342 print
263 print
343 return blocks
264 return blocks
344
265
345 text = open(sys.argv[1]).read()
266 text = open(sys.argv[1]).read()
346 blocks = debug(findblocks, text)
267 blocks = debug(findblocks, text)
347 blocks = debug(findliteralblocks, blocks)
268 blocks = debug(findliteralblocks, blocks)
269 blocks = debug(splitparagraphs, blocks)
348 blocks = debug(findsections, blocks)
270 blocks = debug(findsections, blocks)
349 blocks = debug(findbulletlists, blocks)
350 blocks = debug(findoptionlists, blocks)
351 blocks = debug(findfieldlists, blocks)
352 blocks = debug(finddefinitionlists, blocks)
353 blocks = debug(addmargins, blocks)
271 blocks = debug(addmargins, blocks)
354 print '\n'.join(formatblock(b, 30) for b in blocks)
272 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now