##// END OF EJS Templates
minirst: update module docstring
Martin Geisler -
r9741:245689e7 default
parent child Browse files
Show More
@@ -1,275 +1,279 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - sections
20
19 - paragraphs
21 - paragraphs
20
22
21 - definition lists (must use ' ' to indent definitions)
23 - literal blocks
24
25 - definition lists
22
26
23 - lists (items must start with '-')
27 - bullet lists (items must start with '-')
28
29 - enumerated lists (no autonumbering)
24
30
25 - field lists (colons cannot be escaped)
31 - field lists (colons cannot be escaped)
26
32
27 - literal blocks
28
29 - option lists (supports only long options without arguments)
33 - option lists (supports only long options without arguments)
30
34
31 - inline markup is not recognized at all.
35 - inline literals (no other inline markup is not recognized)
32 """
36 """
33
37
34 import re, sys, textwrap
38 import re, sys, textwrap
35
39
36
40
37 def findblocks(text):
41 def findblocks(text):
38 """Find continuous blocks of lines in text.
42 """Find continuous blocks of lines in text.
39
43
40 Returns a list of dictionaries representing the blocks. Each block
44 Returns a list of dictionaries representing the blocks. Each block
41 has an 'indent' field and a 'lines' field.
45 has an 'indent' field and a 'lines' field.
42 """
46 """
43 blocks = [[]]
47 blocks = [[]]
44 lines = text.splitlines()
48 lines = text.splitlines()
45 for line in lines:
49 for line in lines:
46 if line.strip():
50 if line.strip():
47 blocks[-1].append(line)
51 blocks[-1].append(line)
48 elif blocks[-1]:
52 elif blocks[-1]:
49 blocks.append([])
53 blocks.append([])
50 if not blocks[-1]:
54 if not blocks[-1]:
51 del blocks[-1]
55 del blocks[-1]
52
56
53 for i, block in enumerate(blocks):
57 for i, block in enumerate(blocks):
54 indent = min((len(l) - len(l.lstrip())) for l in block)
58 indent = min((len(l) - len(l.lstrip())) for l in block)
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 return blocks
60 return blocks
57
61
58
62
59 def findliteralblocks(blocks):
63 def findliteralblocks(blocks):
60 """Finds literal blocks and adds a 'type' field to the blocks.
64 """Finds literal blocks and adds a 'type' field to the blocks.
61
65
62 Literal blocks are given the type 'literal', all other blocks are
66 Literal blocks are given the type 'literal', all other blocks are
63 given type the 'paragraph'.
67 given type the 'paragraph'.
64 """
68 """
65 i = 0
69 i = 0
66 while i < len(blocks):
70 while i < len(blocks):
67 # Searching for a block that looks like this:
71 # Searching for a block that looks like this:
68 #
72 #
69 # +------------------------------+
73 # +------------------------------+
70 # | paragraph |
74 # | paragraph |
71 # | (ends with "::") |
75 # | (ends with "::") |
72 # +------------------------------+
76 # +------------------------------+
73 # +---------------------------+
77 # +---------------------------+
74 # | indented literal block |
78 # | indented literal block |
75 # +---------------------------+
79 # +---------------------------+
76 blocks[i]['type'] = 'paragraph'
80 blocks[i]['type'] = 'paragraph'
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
81 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 indent = blocks[i]['indent']
82 indent = blocks[i]['indent']
79 adjustment = blocks[i+1]['indent'] - indent
83 adjustment = blocks[i+1]['indent'] - indent
80
84
81 if blocks[i]['lines'] == ['::']:
85 if blocks[i]['lines'] == ['::']:
82 # Expanded form: remove block
86 # Expanded form: remove block
83 del blocks[i]
87 del blocks[i]
84 i -= 1
88 i -= 1
85 elif blocks[i]['lines'][-1].endswith(' ::'):
89 elif blocks[i]['lines'][-1].endswith(' ::'):
86 # Partially minimized form: remove space and both
90 # Partially minimized form: remove space and both
87 # colons.
91 # colons.
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 else:
93 else:
90 # Fully minimized form: remove just one colon.
94 # Fully minimized form: remove just one colon.
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92
96
93 # List items are formatted with a hanging indent. We must
97 # List items are formatted with a hanging indent. We must
94 # correct for this here while we still have the original
98 # correct for this here while we still have the original
95 # information on the indentation of the subsequent literal
99 # information on the indentation of the subsequent literal
96 # blocks available.
100 # blocks available.
97 m = _bulletre.match(blocks[i]['lines'][0])
101 m = _bulletre.match(blocks[i]['lines'][0])
98 if m:
102 if m:
99 indent += m.end()
103 indent += m.end()
100 adjustment -= m.end()
104 adjustment -= m.end()
101
105
102 # Mark the following indented blocks.
106 # Mark the following indented blocks.
103 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
107 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
104 blocks[i+1]['type'] = 'literal'
108 blocks[i+1]['type'] = 'literal'
105 blocks[i+1]['indent'] -= adjustment
109 blocks[i+1]['indent'] -= adjustment
106 i += 1
110 i += 1
107 i += 1
111 i += 1
108 return blocks
112 return blocks
109
113
110 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)) ')
114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)) ')
111 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
112 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
113 _definitionre = re.compile(r'[^ ]')
117 _definitionre = re.compile(r'[^ ]')
114
118
115 def splitparagraphs(blocks):
119 def splitparagraphs(blocks):
116 """Split paragraphs into lists."""
120 """Split paragraphs into lists."""
117 # Tuples with (list type, item regexp, single line items?). Order
121 # Tuples with (list type, item regexp, single line items?). Order
118 # matters: definition lists has the least specific regexp and must
122 # matters: definition lists has the least specific regexp and must
119 # come last.
123 # come last.
120 listtypes = [('bullet', _bulletre, True),
124 listtypes = [('bullet', _bulletre, True),
121 ('option', _optionre, True),
125 ('option', _optionre, True),
122 ('field', _fieldre, True),
126 ('field', _fieldre, True),
123 ('definition', _definitionre, False)]
127 ('definition', _definitionre, False)]
124
128
125 def match(lines, i, itemre, singleline):
129 def match(lines, i, itemre, singleline):
126 """Does itemre match an item at line i?
130 """Does itemre match an item at line i?
127
131
128 A list item can be followed by an idented line or another list
132 A list item can be followed by an idented line or another list
129 item (but only if singleline is True).
133 item (but only if singleline is True).
130 """
134 """
131 line1 = lines[i]
135 line1 = lines[i]
132 line2 = i+1 < len(lines) and lines[i+1] or ''
136 line2 = i+1 < len(lines) and lines[i+1] or ''
133 if not itemre.match(line1):
137 if not itemre.match(line1):
134 return False
138 return False
135 if singleline:
139 if singleline:
136 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
137 else:
141 else:
138 return line2.startswith(' ')
142 return line2.startswith(' ')
139
143
140 i = 0
144 i = 0
141 while i < len(blocks):
145 while i < len(blocks):
142 if blocks[i]['type'] == 'paragraph':
146 if blocks[i]['type'] == 'paragraph':
143 lines = blocks[i]['lines']
147 lines = blocks[i]['lines']
144 for type, itemre, singleline in listtypes:
148 for type, itemre, singleline in listtypes:
145 if match(lines, 0, itemre, singleline):
149 if match(lines, 0, itemre, singleline):
146 items = []
150 items = []
147 for j, line in enumerate(lines):
151 for j, line in enumerate(lines):
148 if match(lines, j, itemre, singleline):
152 if match(lines, j, itemre, singleline):
149 items.append(dict(type=type, lines=[],
153 items.append(dict(type=type, lines=[],
150 indent=blocks[i]['indent']))
154 indent=blocks[i]['indent']))
151 items[-1]['lines'].append(line)
155 items[-1]['lines'].append(line)
152 blocks[i:i+1] = items
156 blocks[i:i+1] = items
153 break
157 break
154 i += 1
158 i += 1
155 return blocks
159 return blocks
156
160
157
161
158 def findsections(blocks):
162 def findsections(blocks):
159 """Finds sections.
163 """Finds sections.
160
164
161 The blocks must have a 'type' field, i.e., they should have been
165 The blocks must have a 'type' field, i.e., they should have been
162 run through findliteralblocks first.
166 run through findliteralblocks first.
163 """
167 """
164 for block in blocks:
168 for block in blocks:
165 # Searching for a block that looks like this:
169 # Searching for a block that looks like this:
166 #
170 #
167 # +------------------------------+
171 # +------------------------------+
168 # | Section title |
172 # | Section title |
169 # | ------------- |
173 # | ------------- |
170 # +------------------------------+
174 # +------------------------------+
171 if (block['type'] == 'paragraph' and
175 if (block['type'] == 'paragraph' and
172 len(block['lines']) == 2 and
176 len(block['lines']) == 2 and
173 block['lines'][1] == '-' * len(block['lines'][0])):
177 block['lines'][1] == '-' * len(block['lines'][0])):
174 block['type'] = 'section'
178 block['type'] = 'section'
175 return blocks
179 return blocks
176
180
177
181
178 def inlineliterals(blocks):
182 def inlineliterals(blocks):
179 for b in blocks:
183 for b in blocks:
180 if b['type'] == 'paragraph':
184 if b['type'] == 'paragraph':
181 b['lines'] = [l.replace('``', '"') for l in b['lines']]
185 b['lines'] = [l.replace('``', '"') for l in b['lines']]
182 return blocks
186 return blocks
183
187
184
188
185 def addmargins(blocks):
189 def addmargins(blocks):
186 """Adds empty blocks for vertical spacing.
190 """Adds empty blocks for vertical spacing.
187
191
188 This groups bullets, options, and definitions together with no vertical
192 This groups bullets, options, and definitions together with no vertical
189 space between them, and adds an empty block between all other blocks.
193 space between them, and adds an empty block between all other blocks.
190 """
194 """
191 i = 1
195 i = 1
192 while i < len(blocks):
196 while i < len(blocks):
193 if (blocks[i]['type'] == blocks[i-1]['type'] and
197 if (blocks[i]['type'] == blocks[i-1]['type'] and
194 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
198 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
195 i += 1
199 i += 1
196 else:
200 else:
197 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
201 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
198 i += 2
202 i += 2
199 return blocks
203 return blocks
200
204
201
205
202 def formatblock(block, width):
206 def formatblock(block, width):
203 """Format a block according to width."""
207 """Format a block according to width."""
204 if width <= 0:
208 if width <= 0:
205 width = 78
209 width = 78
206 indent = ' ' * block['indent']
210 indent = ' ' * block['indent']
207 if block['type'] == 'margin':
211 if block['type'] == 'margin':
208 return ''
212 return ''
209 if block['type'] == 'literal':
213 if block['type'] == 'literal':
210 indent += ' '
214 indent += ' '
211 return indent + ('\n' + indent).join(block['lines'])
215 return indent + ('\n' + indent).join(block['lines'])
212 if block['type'] == 'section':
216 if block['type'] == 'section':
213 return indent + ('\n' + indent).join(block['lines'])
217 return indent + ('\n' + indent).join(block['lines'])
214 if block['type'] == 'definition':
218 if block['type'] == 'definition':
215 term = indent + block['lines'][0]
219 term = indent + block['lines'][0]
216 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
220 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
217 defindent = indent + hang * ' '
221 defindent = indent + hang * ' '
218 text = ' '.join(map(str.strip, block['lines'][1:]))
222 text = ' '.join(map(str.strip, block['lines'][1:]))
219 return "%s\n%s" % (term, textwrap.fill(text, width=width,
223 return "%s\n%s" % (term, textwrap.fill(text, width=width,
220 initial_indent=defindent,
224 initial_indent=defindent,
221 subsequent_indent=defindent))
225 subsequent_indent=defindent))
222 initindent = subindent = indent
226 initindent = subindent = indent
223 if block['type'] == 'bullet':
227 if block['type'] == 'bullet':
224 m = _bulletre.match(block['lines'][0])
228 m = _bulletre.match(block['lines'][0])
225 if m:
229 if m:
226 subindent = indent + m.end() * ' '
230 subindent = indent + m.end() * ' '
227 elif block['type'] == 'field':
231 elif block['type'] == 'field':
228 m = _fieldre.match(block['lines'][0])
232 m = _fieldre.match(block['lines'][0])
229 if m:
233 if m:
230 key, spaces, rest = m.groups()
234 key, spaces, rest = m.groups()
231 # Turn ":foo: bar" into "foo bar".
235 # Turn ":foo: bar" into "foo bar".
232 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
236 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
233 subindent = indent + (2 + len(key) + len(spaces)) * ' '
237 subindent = indent + (2 + len(key) + len(spaces)) * ' '
234 elif block['type'] == 'option':
238 elif block['type'] == 'option':
235 m = _optionre.match(block['lines'][0])
239 m = _optionre.match(block['lines'][0])
236 if m:
240 if m:
237 option, arg, rest = m.groups()
241 option, arg, rest = m.groups()
238 subindent = indent + (len(option) + len(arg)) * ' '
242 subindent = indent + (len(option) + len(arg)) * ' '
239
243
240 text = ' '.join(map(str.strip, block['lines']))
244 text = ' '.join(map(str.strip, block['lines']))
241 return textwrap.fill(text, width=width,
245 return textwrap.fill(text, width=width,
242 initial_indent=initindent,
246 initial_indent=initindent,
243 subsequent_indent=subindent)
247 subsequent_indent=subindent)
244
248
245
249
246 def format(text, width, indent=0):
250 def format(text, width, indent=0):
247 """Parse and format the text according to width."""
251 """Parse and format the text according to width."""
248 blocks = findblocks(text)
252 blocks = findblocks(text)
249 for b in blocks:
253 for b in blocks:
250 b['indent'] += indent
254 b['indent'] += indent
251 blocks = findliteralblocks(blocks)
255 blocks = findliteralblocks(blocks)
252 blocks = inlineliterals(blocks)
256 blocks = inlineliterals(blocks)
253 blocks = splitparagraphs(blocks)
257 blocks = splitparagraphs(blocks)
254 blocks = findsections(blocks)
258 blocks = findsections(blocks)
255 blocks = addmargins(blocks)
259 blocks = addmargins(blocks)
256 return '\n'.join(formatblock(b, width) for b in blocks)
260 return '\n'.join(formatblock(b, width) for b in blocks)
257
261
258
262
259 if __name__ == "__main__":
263 if __name__ == "__main__":
260 from pprint import pprint
264 from pprint import pprint
261
265
262 def debug(func, blocks):
266 def debug(func, blocks):
263 blocks = func(blocks)
267 blocks = func(blocks)
264 print "*** after %s:" % func.__name__
268 print "*** after %s:" % func.__name__
265 pprint(blocks)
269 pprint(blocks)
266 print
270 print
267 return blocks
271 return blocks
268
272
269 text = open(sys.argv[1]).read()
273 text = open(sys.argv[1]).read()
270 blocks = debug(findblocks, text)
274 blocks = debug(findblocks, text)
271 blocks = debug(findliteralblocks, blocks)
275 blocks = debug(findliteralblocks, blocks)
272 blocks = debug(splitparagraphs, blocks)
276 blocks = debug(splitparagraphs, blocks)
273 blocks = debug(findsections, blocks)
277 blocks = debug(findsections, blocks)
274 blocks = debug(addmargins, blocks)
278 blocks = debug(addmargins, blocks)
275 print '\n'.join(formatblock(b, 30) for b in blocks)
279 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now