##// END OF EJS Templates
minirst: prepare for general types of bullet lists...
Martin Geisler -
r9738:f52c4f7a default
parent child Browse files
Show More
@@ -1,272 +1,275 b''
1 # minirst.py - minimal reStructuredText parser
1 # minirst.py - minimal reStructuredText parser
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """simplified reStructuredText parser.
8 """simplified reStructuredText parser.
9
9
10 This parser knows just enough about reStructuredText to parse the
10 This parser knows just enough about reStructuredText to parse the
11 Mercurial docstrings.
11 Mercurial docstrings.
12
12
13 It cheats in a major way: nested blocks are not really nested. They
13 It cheats in a major way: nested blocks are not really nested. They
14 are just indented blocks that look like they are nested. This relies
14 are just indented blocks that look like they are nested. This relies
15 on the user to keep the right indentation for the blocks.
15 on the user to keep the right indentation for the blocks.
16
16
17 It only supports a small subset of reStructuredText:
17 It only supports a small subset of reStructuredText:
18
18
19 - paragraphs
19 - paragraphs
20
20
21 - definition lists (must use ' ' to indent definitions)
21 - definition lists (must use ' ' to indent definitions)
22
22
23 - lists (items must start with '-')
23 - lists (items must start with '-')
24
24
25 - field lists (colons cannot be escaped)
25 - field lists (colons cannot be escaped)
26
26
27 - literal blocks
27 - literal blocks
28
28
29 - option lists (supports only long options without arguments)
29 - option lists (supports only long options without arguments)
30
30
31 - inline markup is not recognized at all.
31 - inline markup is not recognized at all.
32 """
32 """
33
33
34 import re, sys, textwrap
34 import re, sys, textwrap
35
35
36
36
37 def findblocks(text):
37 def findblocks(text):
38 """Find continuous blocks of lines in text.
38 """Find continuous blocks of lines in text.
39
39
40 Returns a list of dictionaries representing the blocks. Each block
40 Returns a list of dictionaries representing the blocks. Each block
41 has an 'indent' field and a 'lines' field.
41 has an 'indent' field and a 'lines' field.
42 """
42 """
43 blocks = [[]]
43 blocks = [[]]
44 lines = text.splitlines()
44 lines = text.splitlines()
45 for line in lines:
45 for line in lines:
46 if line.strip():
46 if line.strip():
47 blocks[-1].append(line)
47 blocks[-1].append(line)
48 elif blocks[-1]:
48 elif blocks[-1]:
49 blocks.append([])
49 blocks.append([])
50 if not blocks[-1]:
50 if not blocks[-1]:
51 del blocks[-1]
51 del blocks[-1]
52
52
53 for i, block in enumerate(blocks):
53 for i, block in enumerate(blocks):
54 indent = min((len(l) - len(l.lstrip())) for l in block)
54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 return blocks
56 return blocks
57
57
58
58
59 def findliteralblocks(blocks):
59 def findliteralblocks(blocks):
60 """Finds literal blocks and adds a 'type' field to the blocks.
60 """Finds literal blocks and adds a 'type' field to the blocks.
61
61
62 Literal blocks are given the type 'literal', all other blocks are
62 Literal blocks are given the type 'literal', all other blocks are
63 given type the 'paragraph'.
63 given type the 'paragraph'.
64 """
64 """
65 i = 0
65 i = 0
66 while i < len(blocks):
66 while i < len(blocks):
67 # Searching for a block that looks like this:
67 # Searching for a block that looks like this:
68 #
68 #
69 # +------------------------------+
69 # +------------------------------+
70 # | paragraph |
70 # | paragraph |
71 # | (ends with "::") |
71 # | (ends with "::") |
72 # +------------------------------+
72 # +------------------------------+
73 # +---------------------------+
73 # +---------------------------+
74 # | indented literal block |
74 # | indented literal block |
75 # +---------------------------+
75 # +---------------------------+
76 blocks[i]['type'] = 'paragraph'
76 blocks[i]['type'] = 'paragraph'
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 indent = blocks[i]['indent']
78 indent = blocks[i]['indent']
79 adjustment = blocks[i+1]['indent'] - indent
79 adjustment = blocks[i+1]['indent'] - indent
80
80
81 if blocks[i]['lines'] == ['::']:
81 if blocks[i]['lines'] == ['::']:
82 # Expanded form: remove block
82 # Expanded form: remove block
83 del blocks[i]
83 del blocks[i]
84 i -= 1
84 i -= 1
85 elif blocks[i]['lines'][-1].endswith(' ::'):
85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 # Partially minimized form: remove space and both
86 # Partially minimized form: remove space and both
87 # colons.
87 # colons.
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 else:
89 else:
90 # Fully minimized form: remove just one colon.
90 # Fully minimized form: remove just one colon.
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92
92
93 # List items are formatted with a hanging indent. We must
93 # List items are formatted with a hanging indent. We must
94 # correct for this here while we still have the original
94 # correct for this here while we still have the original
95 # information on the indentation of the subsequent literal
95 # information on the indentation of the subsequent literal
96 # blocks available.
96 # blocks available.
97 if blocks[i]['lines'][0].startswith('- '):
97 m = _bulletre.match(blocks[i]['lines'][0])
98 indent += 2
98 if m:
99 adjustment -= 2
99 indent += m.end()
100 adjustment -= m.end()
100
101
101 # Mark the following indented blocks.
102 # Mark the following indented blocks.
102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 blocks[i+1]['type'] = 'literal'
104 blocks[i+1]['type'] = 'literal'
104 blocks[i+1]['indent'] -= adjustment
105 blocks[i+1]['indent'] -= adjustment
105 i += 1
106 i += 1
106 i += 1
107 i += 1
107 return blocks
108 return blocks
108
109
109 _bulletre = re.compile(r'- ')
110 _bulletre = re.compile(r'- ')
110 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
111 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
111 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
112 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
112 _definitionre = re.compile(r'[^ ]')
113 _definitionre = re.compile(r'[^ ]')
113
114
114 def splitparagraphs(blocks):
115 def splitparagraphs(blocks):
115 """Split paragraphs into lists."""
116 """Split paragraphs into lists."""
116 # Tuples with (list type, item regexp, single line items?). Order
117 # Tuples with (list type, item regexp, single line items?). Order
117 # matters: definition lists has the least specific regexp and must
118 # matters: definition lists has the least specific regexp and must
118 # come last.
119 # come last.
119 listtypes = [('bullet', _bulletre, True),
120 listtypes = [('bullet', _bulletre, True),
120 ('option', _optionre, True),
121 ('option', _optionre, True),
121 ('field', _fieldre, True),
122 ('field', _fieldre, True),
122 ('definition', _definitionre, False)]
123 ('definition', _definitionre, False)]
123
124
124 def match(lines, i, itemre, singleline):
125 def match(lines, i, itemre, singleline):
125 """Does itemre match an item at line i?
126 """Does itemre match an item at line i?
126
127
127 A list item can be followed by an idented line or another list
128 A list item can be followed by an idented line or another list
128 item (but only if singleline is True).
129 item (but only if singleline is True).
129 """
130 """
130 line1 = lines[i]
131 line1 = lines[i]
131 line2 = i+1 < len(lines) and lines[i+1] or ''
132 line2 = i+1 < len(lines) and lines[i+1] or ''
132 if not itemre.match(line1):
133 if not itemre.match(line1):
133 return False
134 return False
134 if singleline:
135 if singleline:
135 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
136 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
136 else:
137 else:
137 return line2.startswith(' ')
138 return line2.startswith(' ')
138
139
139 i = 0
140 i = 0
140 while i < len(blocks):
141 while i < len(blocks):
141 if blocks[i]['type'] == 'paragraph':
142 if blocks[i]['type'] == 'paragraph':
142 lines = blocks[i]['lines']
143 lines = blocks[i]['lines']
143 for type, itemre, singleline in listtypes:
144 for type, itemre, singleline in listtypes:
144 if match(lines, 0, itemre, singleline):
145 if match(lines, 0, itemre, singleline):
145 items = []
146 items = []
146 for j, line in enumerate(lines):
147 for j, line in enumerate(lines):
147 if match(lines, j, itemre, singleline):
148 if match(lines, j, itemre, singleline):
148 items.append(dict(type=type, lines=[],
149 items.append(dict(type=type, lines=[],
149 indent=blocks[i]['indent']))
150 indent=blocks[i]['indent']))
150 items[-1]['lines'].append(line)
151 items[-1]['lines'].append(line)
151 blocks[i:i+1] = items
152 blocks[i:i+1] = items
152 break
153 break
153 i += 1
154 i += 1
154 return blocks
155 return blocks
155
156
156
157
157 def findsections(blocks):
158 def findsections(blocks):
158 """Finds sections.
159 """Finds sections.
159
160
160 The blocks must have a 'type' field, i.e., they should have been
161 The blocks must have a 'type' field, i.e., they should have been
161 run through findliteralblocks first.
162 run through findliteralblocks first.
162 """
163 """
163 for block in blocks:
164 for block in blocks:
164 # Searching for a block that looks like this:
165 # Searching for a block that looks like this:
165 #
166 #
166 # +------------------------------+
167 # +------------------------------+
167 # | Section title |
168 # | Section title |
168 # | ------------- |
169 # | ------------- |
169 # +------------------------------+
170 # +------------------------------+
170 if (block['type'] == 'paragraph' and
171 if (block['type'] == 'paragraph' and
171 len(block['lines']) == 2 and
172 len(block['lines']) == 2 and
172 block['lines'][1] == '-' * len(block['lines'][0])):
173 block['lines'][1] == '-' * len(block['lines'][0])):
173 block['type'] = 'section'
174 block['type'] = 'section'
174 return blocks
175 return blocks
175
176
176
177
177 def inlineliterals(blocks):
178 def inlineliterals(blocks):
178 for b in blocks:
179 for b in blocks:
179 if b['type'] == 'paragraph':
180 if b['type'] == 'paragraph':
180 b['lines'] = [l.replace('``', '"') for l in b['lines']]
181 b['lines'] = [l.replace('``', '"') for l in b['lines']]
181 return blocks
182 return blocks
182
183
183
184
184 def addmargins(blocks):
185 def addmargins(blocks):
185 """Adds empty blocks for vertical spacing.
186 """Adds empty blocks for vertical spacing.
186
187
187 This groups bullets, options, and definitions together with no vertical
188 This groups bullets, options, and definitions together with no vertical
188 space between them, and adds an empty block between all other blocks.
189 space between them, and adds an empty block between all other blocks.
189 """
190 """
190 i = 1
191 i = 1
191 while i < len(blocks):
192 while i < len(blocks):
192 if (blocks[i]['type'] == blocks[i-1]['type'] and
193 if (blocks[i]['type'] == blocks[i-1]['type'] and
193 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
194 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
194 i += 1
195 i += 1
195 else:
196 else:
196 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
197 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
197 i += 2
198 i += 2
198 return blocks
199 return blocks
199
200
200
201
201 def formatblock(block, width):
202 def formatblock(block, width):
202 """Format a block according to width."""
203 """Format a block according to width."""
203 if width <= 0:
204 if width <= 0:
204 width = 78
205 width = 78
205 indent = ' ' * block['indent']
206 indent = ' ' * block['indent']
206 if block['type'] == 'margin':
207 if block['type'] == 'margin':
207 return ''
208 return ''
208 if block['type'] == 'literal':
209 if block['type'] == 'literal':
209 indent += ' '
210 indent += ' '
210 return indent + ('\n' + indent).join(block['lines'])
211 return indent + ('\n' + indent).join(block['lines'])
211 if block['type'] == 'section':
212 if block['type'] == 'section':
212 return indent + ('\n' + indent).join(block['lines'])
213 return indent + ('\n' + indent).join(block['lines'])
213 if block['type'] == 'definition':
214 if block['type'] == 'definition':
214 term = indent + block['lines'][0]
215 term = indent + block['lines'][0]
215 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
216 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
216 defindent = indent + hang * ' '
217 defindent = indent + hang * ' '
217 text = ' '.join(map(str.strip, block['lines'][1:]))
218 text = ' '.join(map(str.strip, block['lines'][1:]))
218 return "%s\n%s" % (term, textwrap.fill(text, width=width,
219 return "%s\n%s" % (term, textwrap.fill(text, width=width,
219 initial_indent=defindent,
220 initial_indent=defindent,
220 subsequent_indent=defindent))
221 subsequent_indent=defindent))
221 initindent = subindent = indent
222 initindent = subindent = indent
222 if block['type'] == 'bullet':
223 if block['type'] == 'bullet':
223 subindent = indent + ' '
224 m = _bulletre.match(block['lines'][0])
225 if m:
226 subindent = indent + m.end() * ' '
224 elif block['type'] == 'field':
227 elif block['type'] == 'field':
225 m = _fieldre.match(block['lines'][0])
228 m = _fieldre.match(block['lines'][0])
226 if m:
229 if m:
227 key, spaces, rest = m.groups()
230 key, spaces, rest = m.groups()
228 # Turn ":foo: bar" into "foo bar".
231 # Turn ":foo: bar" into "foo bar".
229 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
232 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
230 subindent = indent + (2 + len(key) + len(spaces)) * ' '
233 subindent = indent + (2 + len(key) + len(spaces)) * ' '
231 elif block['type'] == 'option':
234 elif block['type'] == 'option':
232 m = _optionre.match(block['lines'][0])
235 m = _optionre.match(block['lines'][0])
233 if m:
236 if m:
234 option, arg, rest = m.groups()
237 option, arg, rest = m.groups()
235 subindent = indent + (len(option) + len(arg)) * ' '
238 subindent = indent + (len(option) + len(arg)) * ' '
236
239
237 text = ' '.join(map(str.strip, block['lines']))
240 text = ' '.join(map(str.strip, block['lines']))
238 return textwrap.fill(text, width=width,
241 return textwrap.fill(text, width=width,
239 initial_indent=initindent,
242 initial_indent=initindent,
240 subsequent_indent=subindent)
243 subsequent_indent=subindent)
241
244
242
245
243 def format(text, width, indent=0):
246 def format(text, width, indent=0):
244 """Parse and format the text according to width."""
247 """Parse and format the text according to width."""
245 blocks = findblocks(text)
248 blocks = findblocks(text)
246 for b in blocks:
249 for b in blocks:
247 b['indent'] += indent
250 b['indent'] += indent
248 blocks = findliteralblocks(blocks)
251 blocks = findliteralblocks(blocks)
249 blocks = inlineliterals(blocks)
252 blocks = inlineliterals(blocks)
250 blocks = splitparagraphs(blocks)
253 blocks = splitparagraphs(blocks)
251 blocks = findsections(blocks)
254 blocks = findsections(blocks)
252 blocks = addmargins(blocks)
255 blocks = addmargins(blocks)
253 return '\n'.join(formatblock(b, width) for b in blocks)
256 return '\n'.join(formatblock(b, width) for b in blocks)
254
257
255
258
256 if __name__ == "__main__":
259 if __name__ == "__main__":
257 from pprint import pprint
260 from pprint import pprint
258
261
259 def debug(func, blocks):
262 def debug(func, blocks):
260 blocks = func(blocks)
263 blocks = func(blocks)
261 print "*** after %s:" % func.__name__
264 print "*** after %s:" % func.__name__
262 pprint(blocks)
265 pprint(blocks)
263 print
266 print
264 return blocks
267 return blocks
265
268
266 text = open(sys.argv[1]).read()
269 text = open(sys.argv[1]).read()
267 blocks = debug(findblocks, text)
270 blocks = debug(findblocks, text)
268 blocks = debug(findliteralblocks, blocks)
271 blocks = debug(findliteralblocks, blocks)
269 blocks = debug(splitparagraphs, blocks)
272 blocks = debug(splitparagraphs, blocks)
270 blocks = debug(findsections, blocks)
273 blocks = debug(findsections, blocks)
271 blocks = debug(addmargins, blocks)
274 blocks = debug(addmargins, blocks)
272 print '\n'.join(formatblock(b, 30) for b in blocks)
275 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now