##// END OF EJS Templates
minirst: prepare for general types of bullet lists...
Martin Geisler -
r9738:f52c4f7a default
parent child Browse files
Show More
@@ -1,272 +1,275 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - paragraphs
20 20
21 21 - definition lists (must use ' ' to indent definitions)
22 22
23 23 - lists (items must start with '-')
24 24
25 25 - field lists (colons cannot be escaped)
26 26
27 27 - literal blocks
28 28
29 29 - option lists (supports only long options without arguments)
30 30
31 31 - inline markup is not recognized at all.
32 32 """
33 33
34 34 import re, sys, textwrap
35 35
36 36
37 37 def findblocks(text):
38 38 """Find continuous blocks of lines in text.
39 39
40 40 Returns a list of dictionaries representing the blocks. Each block
41 41 has an 'indent' field and a 'lines' field.
42 42 """
43 43 blocks = [[]]
44 44 lines = text.splitlines()
45 45 for line in lines:
46 46 if line.strip():
47 47 blocks[-1].append(line)
48 48 elif blocks[-1]:
49 49 blocks.append([])
50 50 if not blocks[-1]:
51 51 del blocks[-1]
52 52
53 53 for i, block in enumerate(blocks):
54 54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 56 return blocks
57 57
58 58
59 59 def findliteralblocks(blocks):
60 60 """Finds literal blocks and adds a 'type' field to the blocks.
61 61
62 62 Literal blocks are given the type 'literal', all other blocks are
63 63 given type the 'paragraph'.
64 64 """
65 65 i = 0
66 66 while i < len(blocks):
67 67 # Searching for a block that looks like this:
68 68 #
69 69 # +------------------------------+
70 70 # | paragraph |
71 71 # | (ends with "::") |
72 72 # +------------------------------+
73 73 # +---------------------------+
74 74 # | indented literal block |
75 75 # +---------------------------+
76 76 blocks[i]['type'] = 'paragraph'
77 77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 78 indent = blocks[i]['indent']
79 79 adjustment = blocks[i+1]['indent'] - indent
80 80
81 81 if blocks[i]['lines'] == ['::']:
82 82 # Expanded form: remove block
83 83 del blocks[i]
84 84 i -= 1
85 85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 86 # Partially minimized form: remove space and both
87 87 # colons.
88 88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 89 else:
90 90 # Fully minimized form: remove just one colon.
91 91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92 92
93 93 # List items are formatted with a hanging indent. We must
94 94 # correct for this here while we still have the original
95 95 # information on the indentation of the subsequent literal
96 96 # blocks available.
97 if blocks[i]['lines'][0].startswith('- '):
98 indent += 2
99 adjustment -= 2
97 m = _bulletre.match(blocks[i]['lines'][0])
98 if m:
99 indent += m.end()
100 adjustment -= m.end()
100 101
101 102 # Mark the following indented blocks.
102 103 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 104 blocks[i+1]['type'] = 'literal'
104 105 blocks[i+1]['indent'] -= adjustment
105 106 i += 1
106 107 i += 1
107 108 return blocks
108 109
109 110 _bulletre = re.compile(r'- ')
110 111 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
111 112 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
112 113 _definitionre = re.compile(r'[^ ]')
113 114
114 115 def splitparagraphs(blocks):
115 116 """Split paragraphs into lists."""
116 117 # Tuples with (list type, item regexp, single line items?). Order
117 118 # matters: definition lists has the least specific regexp and must
118 119 # come last.
119 120 listtypes = [('bullet', _bulletre, True),
120 121 ('option', _optionre, True),
121 122 ('field', _fieldre, True),
122 123 ('definition', _definitionre, False)]
123 124
124 125 def match(lines, i, itemre, singleline):
125 126 """Does itemre match an item at line i?
126 127
127 128 A list item can be followed by an idented line or another list
128 129 item (but only if singleline is True).
129 130 """
130 131 line1 = lines[i]
131 132 line2 = i+1 < len(lines) and lines[i+1] or ''
132 133 if not itemre.match(line1):
133 134 return False
134 135 if singleline:
135 136 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
136 137 else:
137 138 return line2.startswith(' ')
138 139
139 140 i = 0
140 141 while i < len(blocks):
141 142 if blocks[i]['type'] == 'paragraph':
142 143 lines = blocks[i]['lines']
143 144 for type, itemre, singleline in listtypes:
144 145 if match(lines, 0, itemre, singleline):
145 146 items = []
146 147 for j, line in enumerate(lines):
147 148 if match(lines, j, itemre, singleline):
148 149 items.append(dict(type=type, lines=[],
149 150 indent=blocks[i]['indent']))
150 151 items[-1]['lines'].append(line)
151 152 blocks[i:i+1] = items
152 153 break
153 154 i += 1
154 155 return blocks
155 156
156 157
157 158 def findsections(blocks):
158 159 """Finds sections.
159 160
160 161 The blocks must have a 'type' field, i.e., they should have been
161 162 run through findliteralblocks first.
162 163 """
163 164 for block in blocks:
164 165 # Searching for a block that looks like this:
165 166 #
166 167 # +------------------------------+
167 168 # | Section title |
168 169 # | ------------- |
169 170 # +------------------------------+
170 171 if (block['type'] == 'paragraph' and
171 172 len(block['lines']) == 2 and
172 173 block['lines'][1] == '-' * len(block['lines'][0])):
173 174 block['type'] = 'section'
174 175 return blocks
175 176
176 177
177 178 def inlineliterals(blocks):
178 179 for b in blocks:
179 180 if b['type'] == 'paragraph':
180 181 b['lines'] = [l.replace('``', '"') for l in b['lines']]
181 182 return blocks
182 183
183 184
184 185 def addmargins(blocks):
185 186 """Adds empty blocks for vertical spacing.
186 187
187 188 This groups bullets, options, and definitions together with no vertical
188 189 space between them, and adds an empty block between all other blocks.
189 190 """
190 191 i = 1
191 192 while i < len(blocks):
192 193 if (blocks[i]['type'] == blocks[i-1]['type'] and
193 194 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
194 195 i += 1
195 196 else:
196 197 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
197 198 i += 2
198 199 return blocks
199 200
200 201
201 202 def formatblock(block, width):
202 203 """Format a block according to width."""
203 204 if width <= 0:
204 205 width = 78
205 206 indent = ' ' * block['indent']
206 207 if block['type'] == 'margin':
207 208 return ''
208 209 if block['type'] == 'literal':
209 210 indent += ' '
210 211 return indent + ('\n' + indent).join(block['lines'])
211 212 if block['type'] == 'section':
212 213 return indent + ('\n' + indent).join(block['lines'])
213 214 if block['type'] == 'definition':
214 215 term = indent + block['lines'][0]
215 216 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
216 217 defindent = indent + hang * ' '
217 218 text = ' '.join(map(str.strip, block['lines'][1:]))
218 219 return "%s\n%s" % (term, textwrap.fill(text, width=width,
219 220 initial_indent=defindent,
220 221 subsequent_indent=defindent))
221 222 initindent = subindent = indent
222 223 if block['type'] == 'bullet':
223 subindent = indent + ' '
224 m = _bulletre.match(block['lines'][0])
225 if m:
226 subindent = indent + m.end() * ' '
224 227 elif block['type'] == 'field':
225 228 m = _fieldre.match(block['lines'][0])
226 229 if m:
227 230 key, spaces, rest = m.groups()
228 231 # Turn ":foo: bar" into "foo bar".
229 232 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
230 233 subindent = indent + (2 + len(key) + len(spaces)) * ' '
231 234 elif block['type'] == 'option':
232 235 m = _optionre.match(block['lines'][0])
233 236 if m:
234 237 option, arg, rest = m.groups()
235 238 subindent = indent + (len(option) + len(arg)) * ' '
236 239
237 240 text = ' '.join(map(str.strip, block['lines']))
238 241 return textwrap.fill(text, width=width,
239 242 initial_indent=initindent,
240 243 subsequent_indent=subindent)
241 244
242 245
243 246 def format(text, width, indent=0):
244 247 """Parse and format the text according to width."""
245 248 blocks = findblocks(text)
246 249 for b in blocks:
247 250 b['indent'] += indent
248 251 blocks = findliteralblocks(blocks)
249 252 blocks = inlineliterals(blocks)
250 253 blocks = splitparagraphs(blocks)
251 254 blocks = findsections(blocks)
252 255 blocks = addmargins(blocks)
253 256 return '\n'.join(formatblock(b, width) for b in blocks)
254 257
255 258
256 259 if __name__ == "__main__":
257 260 from pprint import pprint
258 261
259 262 def debug(func, blocks):
260 263 blocks = func(blocks)
261 264 print "*** after %s:" % func.__name__
262 265 pprint(blocks)
263 266 print
264 267 return blocks
265 268
266 269 text = open(sys.argv[1]).read()
267 270 blocks = debug(findblocks, text)
268 271 blocks = debug(findliteralblocks, blocks)
269 272 blocks = debug(splitparagraphs, blocks)
270 273 blocks = debug(findsections, blocks)
271 274 blocks = debug(addmargins, blocks)
272 275 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now