##// END OF EJS Templates
minirst: convert ``foo`` into "foo" upon display...
Martin Geisler -
r9623:32727ce0 default
parent child Browse files
Show More
@@ -1,347 +1,355 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - paragraphs
19 - paragraphs
20
20
21 - definition lists (must use ' ' to indent definitions)
21 - definition lists (must use ' ' to indent definitions)
22
22
23 - lists (items must start with '-')
23 - lists (items must start with '-')
24
24
25 - field lists (colons cannot be escaped)
25 - field lists (colons cannot be escaped)
26
26
27 - literal blocks
27 - literal blocks
28
28
29 - option lists (supports only long options without arguments)
29 - option lists (supports only long options without arguments)
30
30
31 - inline markup is not recognized at all.
31 - inline markup is not recognized at all.
32 """
32 """
33
33
34 import re, sys, textwrap
34 import re, sys, textwrap
35
35
36
36
37 def findblocks(text):
37 def findblocks(text):
38 """Find continuous blocks of lines in text.
38 """Find continuous blocks of lines in text.
39
39
40 Returns a list of dictionaries representing the blocks. Each block
40 Returns a list of dictionaries representing the blocks. Each block
41 has an 'indent' field and a 'lines' field.
41 has an 'indent' field and a 'lines' field.
42 """
42 """
43 blocks = [[]]
43 blocks = [[]]
44 lines = text.splitlines()
44 lines = text.splitlines()
45 for line in lines:
45 for line in lines:
46 if line.strip():
46 if line.strip():
47 blocks[-1].append(line)
47 blocks[-1].append(line)
48 elif blocks[-1]:
48 elif blocks[-1]:
49 blocks.append([])
49 blocks.append([])
50 if not blocks[-1]:
50 if not blocks[-1]:
51 del blocks[-1]
51 del blocks[-1]
52
52
53 for i, block in enumerate(blocks):
53 for i, block in enumerate(blocks):
54 indent = min((len(l) - len(l.lstrip())) for l in block)
54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 return blocks
56 return blocks
57
57
58
58
59 def findliteralblocks(blocks):
59 def findliteralblocks(blocks):
60 """Finds literal blocks and adds a 'type' field to the blocks.
60 """Finds literal blocks and adds a 'type' field to the blocks.
61
61
62 Literal blocks are given the type 'literal', all other blocks are
62 Literal blocks are given the type 'literal', all other blocks are
63 given type the 'paragraph'.
63 given type the 'paragraph'.
64 """
64 """
65 i = 0
65 i = 0
66 while i < len(blocks):
66 while i < len(blocks):
67 # Searching for a block that looks like this:
67 # Searching for a block that looks like this:
68 #
68 #
69 # +------------------------------+
69 # +------------------------------+
70 # | paragraph |
70 # | paragraph |
71 # | (ends with "::") |
71 # | (ends with "::") |
72 # +------------------------------+
72 # +------------------------------+
73 # +---------------------------+
73 # +---------------------------+
74 # | indented literal block |
74 # | indented literal block |
75 # +---------------------------+
75 # +---------------------------+
76 blocks[i]['type'] = 'paragraph'
76 blocks[i]['type'] = 'paragraph'
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 indent = blocks[i]['indent']
78 indent = blocks[i]['indent']
79 adjustment = blocks[i+1]['indent'] - indent
79 adjustment = blocks[i+1]['indent'] - indent
80
80
81 if blocks[i]['lines'] == ['::']:
81 if blocks[i]['lines'] == ['::']:
82 # Expanded form: remove block
82 # Expanded form: remove block
83 del blocks[i]
83 del blocks[i]
84 i -= 1
84 i -= 1
85 elif blocks[i]['lines'][-1].endswith(' ::'):
85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 # Partially minimized form: remove space and both
86 # Partially minimized form: remove space and both
87 # colons.
87 # colons.
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 else:
89 else:
90 # Fully minimized form: remove just one colon.
90 # Fully minimized form: remove just one colon.
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92
92
93 # List items are formatted with a hanging indent. We must
93 # List items are formatted with a hanging indent. We must
94 # correct for this here while we still have the original
94 # correct for this here while we still have the original
95 # information on the indentation of the subsequent literal
95 # information on the indentation of the subsequent literal
96 # blocks available.
96 # blocks available.
97 if blocks[i]['lines'][0].startswith('- '):
97 if blocks[i]['lines'][0].startswith('- '):
98 indent += 2
98 indent += 2
99 adjustment -= 2
99 adjustment -= 2
100
100
101 # Mark the following indented blocks.
101 # Mark the following indented blocks.
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 blocks[i+1]['type'] = 'literal'
103 blocks[i+1]['type'] = 'literal'
104 blocks[i+1]['indent'] -= adjustment
104 blocks[i+1]['indent'] -= adjustment
105 i += 1
105 i += 1
106 i += 1
106 i += 1
107 return blocks
107 return blocks
108
108
109
109
110 def findsections(blocks):
110 def findsections(blocks):
111 """Finds sections.
111 """Finds sections.
112
112
113 The blocks must have a 'type' field, i.e., they should have been
113 The blocks must have a 'type' field, i.e., they should have been
114 run through findliteralblocks first.
114 run through findliteralblocks first.
115 """
115 """
116 for block in blocks:
116 for block in blocks:
117 # Searching for a block that looks like this:
117 # Searching for a block that looks like this:
118 #
118 #
119 # +------------------------------+
119 # +------------------------------+
120 # | Section title |
120 # | Section title |
121 # | ------------- |
121 # | ------------- |
122 # +------------------------------+
122 # +------------------------------+
123 if (block['type'] == 'paragraph' and
123 if (block['type'] == 'paragraph' and
124 len(block['lines']) == 2 and
124 len(block['lines']) == 2 and
125 block['lines'][1] == '-' * len(block['lines'][0])):
125 block['lines'][1] == '-' * len(block['lines'][0])):
126 block['type'] = 'section'
126 block['type'] = 'section'
127 return blocks
127 return blocks
128
128
129
129
130 def findbulletlists(blocks):
130 def findbulletlists(blocks):
131 """Finds bullet lists.
131 """Finds bullet lists.
132
132
133 The blocks must have a 'type' field, i.e., they should have been
133 The blocks must have a 'type' field, i.e., they should have been
134 run through findliteralblocks first.
134 run through findliteralblocks first.
135 """
135 """
136 i = 0
136 i = 0
137 while i < len(blocks):
137 while i < len(blocks):
138 # Searching for a paragraph that looks like this:
138 # Searching for a paragraph that looks like this:
139 #
139 #
140 # +------+-----------------------+
140 # +------+-----------------------+
141 # | "- " | list item |
141 # | "- " | list item |
142 # +------| (body elements)+ |
142 # +------| (body elements)+ |
143 # +-----------------------+
143 # +-----------------------+
144 if (blocks[i]['type'] == 'paragraph' and
144 if (blocks[i]['type'] == 'paragraph' and
145 blocks[i]['lines'][0].startswith('- ')):
145 blocks[i]['lines'][0].startswith('- ')):
146 items = []
146 items = []
147 for line in blocks[i]['lines']:
147 for line in blocks[i]['lines']:
148 if line.startswith('- '):
148 if line.startswith('- '):
149 items.append(dict(type='bullet', lines=[],
149 items.append(dict(type='bullet', lines=[],
150 indent=blocks[i]['indent']))
150 indent=blocks[i]['indent']))
151 line = line[2:]
151 line = line[2:]
152 items[-1]['lines'].append(line)
152 items[-1]['lines'].append(line)
153 blocks[i:i+1] = items
153 blocks[i:i+1] = items
154 i += len(items) - 1
154 i += len(items) - 1
155 i += 1
155 i += 1
156 return blocks
156 return blocks
157
157
158
158
159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
160 def findoptionlists(blocks):
160 def findoptionlists(blocks):
161 """Finds option lists.
161 """Finds option lists.
162
162
163 The blocks must have a 'type' field, i.e., they should have been
163 The blocks must have a 'type' field, i.e., they should have been
164 run through findliteralblocks first.
164 run through findliteralblocks first.
165 """
165 """
166 i = 0
166 i = 0
167 while i < len(blocks):
167 while i < len(blocks):
168 # Searching for a paragraph that looks like this:
168 # Searching for a paragraph that looks like this:
169 #
169 #
170 # +----------------------------+-------------+
170 # +----------------------------+-------------+
171 # | "--" option " " | description |
171 # | "--" option " " | description |
172 # +-------+--------------------+ |
172 # +-------+--------------------+ |
173 # | (body elements)+ |
173 # | (body elements)+ |
174 # +----------------------------------+
174 # +----------------------------------+
175 if (blocks[i]['type'] == 'paragraph' and
175 if (blocks[i]['type'] == 'paragraph' and
176 _optionre.match(blocks[i]['lines'][0])):
176 _optionre.match(blocks[i]['lines'][0])):
177 options = []
177 options = []
178 for line in blocks[i]['lines']:
178 for line in blocks[i]['lines']:
179 m = _optionre.match(line)
179 m = _optionre.match(line)
180 if m:
180 if m:
181 option, arg, rest = m.groups()
181 option, arg, rest = m.groups()
182 width = len(option) + len(arg)
182 width = len(option) + len(arg)
183 options.append(dict(type='option', lines=[],
183 options.append(dict(type='option', lines=[],
184 indent=blocks[i]['indent'],
184 indent=blocks[i]['indent'],
185 width=width))
185 width=width))
186 options[-1]['lines'].append(line)
186 options[-1]['lines'].append(line)
187 blocks[i:i+1] = options
187 blocks[i:i+1] = options
188 i += len(options) - 1
188 i += len(options) - 1
189 i += 1
189 i += 1
190 return blocks
190 return blocks
191
191
192
192
193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
194 def findfieldlists(blocks):
194 def findfieldlists(blocks):
195 """Finds fields lists.
195 """Finds fields lists.
196
196
197 The blocks must have a 'type' field, i.e., they should have been
197 The blocks must have a 'type' field, i.e., they should have been
198 run through findliteralblocks first.
198 run through findliteralblocks first.
199 """
199 """
200 i = 0
200 i = 0
201 while i < len(blocks):
201 while i < len(blocks):
202 # Searching for a paragraph that looks like this:
202 # Searching for a paragraph that looks like this:
203 #
203 #
204 #
204 #
205 # +--------------------+----------------------+
205 # +--------------------+----------------------+
206 # | ":" field name ":" | field body |
206 # | ":" field name ":" | field body |
207 # +-------+------------+ |
207 # +-------+------------+ |
208 # | (body elements)+ |
208 # | (body elements)+ |
209 # +-----------------------------------+
209 # +-----------------------------------+
210 if (blocks[i]['type'] == 'paragraph' and
210 if (blocks[i]['type'] == 'paragraph' and
211 _fieldre.match(blocks[i]['lines'][0])):
211 _fieldre.match(blocks[i]['lines'][0])):
212 indent = blocks[i]['indent']
212 indent = blocks[i]['indent']
213 fields = []
213 fields = []
214 for line in blocks[i]['lines']:
214 for line in blocks[i]['lines']:
215 m = _fieldre.match(line)
215 m = _fieldre.match(line)
216 if m:
216 if m:
217 key, spaces, rest = m.groups()
217 key, spaces, rest = m.groups()
218 width = 2 + len(key) + len(spaces)
218 width = 2 + len(key) + len(spaces)
219 fields.append(dict(type='field', lines=[],
219 fields.append(dict(type='field', lines=[],
220 indent=indent, width=width))
220 indent=indent, width=width))
221 # Turn ":foo: bar" into "foo bar".
221 # Turn ":foo: bar" into "foo bar".
222 line = '%s %s%s' % (key, spaces, rest)
222 line = '%s %s%s' % (key, spaces, rest)
223 fields[-1]['lines'].append(line)
223 fields[-1]['lines'].append(line)
224 blocks[i:i+1] = fields
224 blocks[i:i+1] = fields
225 i += len(fields) - 1
225 i += len(fields) - 1
226 i += 1
226 i += 1
227 return blocks
227 return blocks
228
228
229
229
230 def finddefinitionlists(blocks):
230 def finddefinitionlists(blocks):
231 """Finds definition lists.
231 """Finds definition lists.
232
232
233 The blocks must have a 'type' field, i.e., they should have been
233 The blocks must have a 'type' field, i.e., they should have been
234 run through findliteralblocks first.
234 run through findliteralblocks first.
235 """
235 """
236 i = 0
236 i = 0
237 while i < len(blocks):
237 while i < len(blocks):
238 # Searching for a paragraph that looks like this:
238 # Searching for a paragraph that looks like this:
239 #
239 #
240 # +----------------------------+
240 # +----------------------------+
241 # | term |
241 # | term |
242 # +--+-------------------------+--+
242 # +--+-------------------------+--+
243 # | definition |
243 # | definition |
244 # | (body elements)+ |
244 # | (body elements)+ |
245 # +----------------------------+
245 # +----------------------------+
246 if (blocks[i]['type'] == 'paragraph' and
246 if (blocks[i]['type'] == 'paragraph' and
247 len(blocks[i]['lines']) > 1 and
247 len(blocks[i]['lines']) > 1 and
248 not blocks[i]['lines'][0].startswith(' ') and
248 not blocks[i]['lines'][0].startswith(' ') and
249 blocks[i]['lines'][1].startswith(' ')):
249 blocks[i]['lines'][1].startswith(' ')):
250 definitions = []
250 definitions = []
251 for line in blocks[i]['lines']:
251 for line in blocks[i]['lines']:
252 if not line.startswith(' '):
252 if not line.startswith(' '):
253 definitions.append(dict(type='definition', lines=[],
253 definitions.append(dict(type='definition', lines=[],
254 indent=blocks[i]['indent']))
254 indent=blocks[i]['indent']))
255 definitions[-1]['lines'].append(line)
255 definitions[-1]['lines'].append(line)
256 definitions[-1]['hang'] = len(line) - len(line.lstrip())
256 definitions[-1]['hang'] = len(line) - len(line.lstrip())
257 blocks[i:i+1] = definitions
257 blocks[i:i+1] = definitions
258 i += len(definitions) - 1
258 i += len(definitions) - 1
259 i += 1
259 i += 1
260 return blocks
260 return blocks
261
261
262
262
263 def inlineliterals(blocks):
264 for b in blocks:
265 if b['type'] == 'paragraph':
266 b['lines'] = [l.replace('``', '"') for l in b['lines']]
267 return blocks
268
269
263 def addmargins(blocks):
270 def addmargins(blocks):
264 """Adds empty blocks for vertical spacing.
271 """Adds empty blocks for vertical spacing.
265
272
266 This groups bullets, options, and definitions together with no vertical
273 This groups bullets, options, and definitions together with no vertical
267 space between them, and adds an empty block between all other blocks.
274 space between them, and adds an empty block between all other blocks.
268 """
275 """
269 i = 1
276 i = 1
270 while i < len(blocks):
277 while i < len(blocks):
271 if (blocks[i]['type'] == blocks[i-1]['type'] and
278 if (blocks[i]['type'] == blocks[i-1]['type'] and
272 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
279 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
273 i += 1
280 i += 1
274 else:
281 else:
275 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
282 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
276 i += 2
283 i += 2
277 return blocks
284 return blocks
278
285
279
286
280 def formatblock(block, width):
287 def formatblock(block, width):
281 """Format a block according to width."""
288 """Format a block according to width."""
282 if width <= 0:
289 if width <= 0:
283 width = 78
290 width = 78
284 indent = ' ' * block['indent']
291 indent = ' ' * block['indent']
285 if block['type'] == 'margin':
292 if block['type'] == 'margin':
286 return ''
293 return ''
287 elif block['type'] == 'literal':
294 elif block['type'] == 'literal':
288 indent += ' '
295 indent += ' '
289 return indent + ('\n' + indent).join(block['lines'])
296 return indent + ('\n' + indent).join(block['lines'])
290 elif block['type'] == 'section':
297 elif block['type'] == 'section':
291 return indent + ('\n' + indent).join(block['lines'])
298 return indent + ('\n' + indent).join(block['lines'])
292 elif block['type'] == 'definition':
299 elif block['type'] == 'definition':
293 term = indent + block['lines'][0]
300 term = indent + block['lines'][0]
294 defindent = indent + block['hang'] * ' '
301 defindent = indent + block['hang'] * ' '
295 text = ' '.join(map(str.strip, block['lines'][1:]))
302 text = ' '.join(map(str.strip, block['lines'][1:]))
296 return "%s\n%s" % (term, textwrap.fill(text, width=width,
303 return "%s\n%s" % (term, textwrap.fill(text, width=width,
297 initial_indent=defindent,
304 initial_indent=defindent,
298 subsequent_indent=defindent))
305 subsequent_indent=defindent))
299 else:
306 else:
300 initindent = subindent = indent
307 initindent = subindent = indent
301 text = ' '.join(map(str.strip, block['lines']))
308 text = ' '.join(map(str.strip, block['lines']))
302 if block['type'] == 'bullet':
309 if block['type'] == 'bullet':
303 initindent = indent + '- '
310 initindent = indent + '- '
304 subindent = indent + ' '
311 subindent = indent + ' '
305 elif block['type'] in ('option', 'field'):
312 elif block['type'] in ('option', 'field'):
306 subindent = indent + block['width'] * ' '
313 subindent = indent + block['width'] * ' '
307
314
308 return textwrap.fill(text, width=width,
315 return textwrap.fill(text, width=width,
309 initial_indent=initindent,
316 initial_indent=initindent,
310 subsequent_indent=subindent)
317 subsequent_indent=subindent)
311
318
312
319
313 def format(text, width, indent=0):
320 def format(text, width, indent=0):
314 """Parse and format the text according to width."""
321 """Parse and format the text according to width."""
315 blocks = findblocks(text)
322 blocks = findblocks(text)
316 for b in blocks:
323 for b in blocks:
317 b['indent'] += indent
324 b['indent'] += indent
318 blocks = findliteralblocks(blocks)
325 blocks = findliteralblocks(blocks)
326 blocks = inlineliterals(blocks)
319 blocks = findsections(blocks)
327 blocks = findsections(blocks)
320 blocks = findbulletlists(blocks)
328 blocks = findbulletlists(blocks)
321 blocks = findoptionlists(blocks)
329 blocks = findoptionlists(blocks)
322 blocks = findfieldlists(blocks)
330 blocks = findfieldlists(blocks)
323 blocks = finddefinitionlists(blocks)
331 blocks = finddefinitionlists(blocks)
324 blocks = addmargins(blocks)
332 blocks = addmargins(blocks)
325 return '\n'.join(formatblock(b, width) for b in blocks)
333 return '\n'.join(formatblock(b, width) for b in blocks)
326
334
327
335
328 if __name__ == "__main__":
336 if __name__ == "__main__":
329 from pprint import pprint
337 from pprint import pprint
330
338
331 def debug(func, blocks):
339 def debug(func, blocks):
332 blocks = func(blocks)
340 blocks = func(blocks)
333 print "*** after %s:" % func.__name__
341 print "*** after %s:" % func.__name__
334 pprint(blocks)
342 pprint(blocks)
335 print
343 print
336 return blocks
344 return blocks
337
345
338 text = open(sys.argv[1]).read()
346 text = open(sys.argv[1]).read()
339 blocks = debug(findblocks, text)
347 blocks = debug(findblocks, text)
340 blocks = debug(findliteralblocks, blocks)
348 blocks = debug(findliteralblocks, blocks)
341 blocks = debug(findsections, blocks)
349 blocks = debug(findsections, blocks)
342 blocks = debug(findbulletlists, blocks)
350 blocks = debug(findbulletlists, blocks)
343 blocks = debug(findoptionlists, blocks)
351 blocks = debug(findoptionlists, blocks)
344 blocks = debug(findfieldlists, blocks)
352 blocks = debug(findfieldlists, blocks)
345 blocks = debug(finddefinitionlists, blocks)
353 blocks = debug(finddefinitionlists, blocks)
346 blocks = debug(addmargins, blocks)
354 blocks = debug(addmargins, blocks)
347 print '\n'.join(formatblock(b, 30) for b in blocks)
355 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now