##// END OF EJS Templates
minirst: removed unnecessary initindent variable
Martin Geisler -
r10937:a9d5943d default
parent child Browse files
Show More
@@ -1,366 +1,366
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - sections
20 20
21 21 - paragraphs
22 22
23 23 - literal blocks
24 24
25 25 - definition lists
26 26
27 27 - bullet lists (items must start with '-')
28 28
29 29 - enumerated lists (no autonumbering)
30 30
31 31 - field lists (colons cannot be escaped)
32 32
33 33 - option lists (supports only long options without arguments)
34 34
35 35 - inline literals (no other inline markup is not recognized)
36 36 """
37 37
38 38 import re, sys, textwrap
39 39
40 40
41 41 def findblocks(text):
42 42 """Find continuous blocks of lines in text.
43 43
44 44 Returns a list of dictionaries representing the blocks. Each block
45 45 has an 'indent' field and a 'lines' field.
46 46 """
47 47 blocks = [[]]
48 48 lines = text.splitlines()
49 49 for line in lines:
50 50 if line.strip():
51 51 blocks[-1].append(line)
52 52 elif blocks[-1]:
53 53 blocks.append([])
54 54 if not blocks[-1]:
55 55 del blocks[-1]
56 56
57 57 for i, block in enumerate(blocks):
58 58 indent = min((len(l) - len(l.lstrip())) for l in block)
59 59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
60 60 return blocks
61 61
62 62
63 63 def findliteralblocks(blocks):
64 64 """Finds literal blocks and adds a 'type' field to the blocks.
65 65
66 66 Literal blocks are given the type 'literal', all other blocks are
67 67 given type the 'paragraph'.
68 68 """
69 69 i = 0
70 70 while i < len(blocks):
71 71 # Searching for a block that looks like this:
72 72 #
73 73 # +------------------------------+
74 74 # | paragraph |
75 75 # | (ends with "::") |
76 76 # +------------------------------+
77 77 # +---------------------------+
78 78 # | indented literal block |
79 79 # +---------------------------+
80 80 blocks[i]['type'] = 'paragraph'
81 81 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
82 82 indent = blocks[i]['indent']
83 83 adjustment = blocks[i + 1]['indent'] - indent
84 84
85 85 if blocks[i]['lines'] == ['::']:
86 86 # Expanded form: remove block
87 87 del blocks[i]
88 88 i -= 1
89 89 elif blocks[i]['lines'][-1].endswith(' ::'):
90 90 # Partially minimized form: remove space and both
91 91 # colons.
92 92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
93 93 else:
94 94 # Fully minimized form: remove just one colon.
95 95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
96 96
97 97 # List items are formatted with a hanging indent. We must
98 98 # correct for this here while we still have the original
99 99 # information on the indentation of the subsequent literal
100 100 # blocks available.
101 101 m = _bulletre.match(blocks[i]['lines'][0])
102 102 if m:
103 103 indent += m.end()
104 104 adjustment -= m.end()
105 105
106 106 # Mark the following indented blocks.
107 107 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
108 108 blocks[i + 1]['type'] = 'literal'
109 109 blocks[i + 1]['indent'] -= adjustment
110 110 i += 1
111 111 i += 1
112 112 return blocks
113 113
114 114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
115 115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
116 116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
117 117 _definitionre = re.compile(r'[^ ]')
118 118
119 119 def splitparagraphs(blocks):
120 120 """Split paragraphs into lists."""
121 121 # Tuples with (list type, item regexp, single line items?). Order
122 122 # matters: definition lists has the least specific regexp and must
123 123 # come last.
124 124 listtypes = [('bullet', _bulletre, True),
125 125 ('option', _optionre, True),
126 126 ('field', _fieldre, True),
127 127 ('definition', _definitionre, False)]
128 128
129 129 def match(lines, i, itemre, singleline):
130 130 """Does itemre match an item at line i?
131 131
132 132 A list item can be followed by an idented line or another list
133 133 item (but only if singleline is True).
134 134 """
135 135 line1 = lines[i]
136 136 line2 = i + 1 < len(lines) and lines[i + 1] or ''
137 137 if not itemre.match(line1):
138 138 return False
139 139 if singleline:
140 140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
141 141 else:
142 142 return line2.startswith(' ')
143 143
144 144 i = 0
145 145 while i < len(blocks):
146 146 if blocks[i]['type'] == 'paragraph':
147 147 lines = blocks[i]['lines']
148 148 for type, itemre, singleline in listtypes:
149 149 if match(lines, 0, itemre, singleline):
150 150 items = []
151 151 for j, line in enumerate(lines):
152 152 if match(lines, j, itemre, singleline):
153 153 items.append(dict(type=type, lines=[],
154 154 indent=blocks[i]['indent']))
155 155 items[-1]['lines'].append(line)
156 156 blocks[i:i + 1] = items
157 157 break
158 158 i += 1
159 159 return blocks
160 160
161 161
162 162 _fieldwidth = 12
163 163
164 164 def updatefieldlists(blocks):
165 165 """Find key and maximum key width for field lists."""
166 166 i = 0
167 167 while i < len(blocks):
168 168 if blocks[i]['type'] != 'field':
169 169 i += 1
170 170 continue
171 171
172 172 keywidth = 0
173 173 j = i
174 174 while j < len(blocks) and blocks[j]['type'] == 'field':
175 175 m = _fieldre.match(blocks[j]['lines'][0])
176 176 key, rest = m.groups()
177 177 blocks[j]['lines'][0] = rest
178 178 blocks[j]['key'] = key
179 179 keywidth = max(keywidth, len(key))
180 180 j += 1
181 181
182 182 for block in blocks[i:j]:
183 183 block['keywidth'] = keywidth
184 184 i = j + 1
185 185
186 186 return blocks
187 187
188 188
189 189 def prunecontainers(blocks, keep):
190 190 """Prune unwanted containers.
191 191
192 192 The blocks must have a 'type' field, i.e., they should have been
193 193 run through findliteralblocks first.
194 194 """
195 195 pruned = []
196 196 i = 0
197 197 while i + 1 < len(blocks):
198 198 # Searching for a block that looks like this:
199 199 #
200 200 # +-------+---------------------------+
201 201 # | ".. container ::" type |
202 202 # +---+ |
203 203 # | blocks |
204 204 # +-------------------------------+
205 205 if (blocks[i]['type'] == 'paragraph' and
206 206 blocks[i]['lines'][0].startswith('.. container::')):
207 207 indent = blocks[i]['indent']
208 208 adjustment = blocks[i + 1]['indent'] - indent
209 209 containertype = blocks[i]['lines'][0][15:]
210 210 prune = containertype not in keep
211 211 if prune:
212 212 pruned.append(containertype)
213 213
214 214 # Always delete "..container:: type" block
215 215 del blocks[i]
216 216 j = i
217 217 while j < len(blocks) and blocks[j]['indent'] > indent:
218 218 if prune:
219 219 del blocks[j]
220 220 i -= 1 # adjust outer index
221 221 else:
222 222 blocks[j]['indent'] -= adjustment
223 223 j += 1
224 224 i += 1
225 225 return blocks, pruned
226 226
227 227
228 228 def findsections(blocks):
229 229 """Finds sections.
230 230
231 231 The blocks must have a 'type' field, i.e., they should have been
232 232 run through findliteralblocks first.
233 233 """
234 234 for block in blocks:
235 235 # Searching for a block that looks like this:
236 236 #
237 237 # +------------------------------+
238 238 # | Section title |
239 239 # | ------------- |
240 240 # +------------------------------+
241 241 if (block['type'] == 'paragraph' and
242 242 len(block['lines']) == 2 and
243 243 block['lines'][1] == '-' * len(block['lines'][0])):
244 244 block['type'] = 'section'
245 245 return blocks
246 246
247 247
248 248 def inlineliterals(blocks):
249 249 for b in blocks:
250 250 if b['type'] == 'paragraph':
251 251 b['lines'] = [l.replace('``', '"') for l in b['lines']]
252 252 return blocks
253 253
254 254
255 255 def addmargins(blocks):
256 256 """Adds empty blocks for vertical spacing.
257 257
258 258 This groups bullets, options, and definitions together with no vertical
259 259 space between them, and adds an empty block between all other blocks.
260 260 """
261 261 i = 1
262 262 while i < len(blocks):
263 263 if (blocks[i]['type'] == blocks[i - 1]['type'] and
264 264 blocks[i]['type'] in ('bullet', 'option', 'field')):
265 265 i += 1
266 266 else:
267 267 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
268 268 i += 2
269 269 return blocks
270 270
271 271
272 272 def formatblock(block, width):
273 273 """Format a block according to width."""
274 274 if width <= 0:
275 275 width = 78
276 276 indent = ' ' * block['indent']
277 277 if block['type'] == 'margin':
278 278 return ''
279 279 if block['type'] == 'literal':
280 280 indent += ' '
281 281 return indent + ('\n' + indent).join(block['lines'])
282 282 if block['type'] == 'section':
283 283 return indent + ('\n' + indent).join(block['lines'])
284 284 if block['type'] == 'definition':
285 285 term = indent + block['lines'][0]
286 286 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
287 287 defindent = indent + hang * ' '
288 288 text = ' '.join(map(str.strip, block['lines'][1:]))
289 289 return "%s\n%s" % (term, textwrap.fill(text, width=width,
290 290 initial_indent=defindent,
291 291 subsequent_indent=defindent))
292 initindent = subindent = indent
292 subindent = indent
293 293 if block['type'] == 'bullet':
294 294 if block['lines'][0].startswith('| '):
295 295 # Remove bullet for line blocks and add no extra
296 296 # indention.
297 297 block['lines'][0] = block['lines'][0][2:]
298 298 else:
299 299 m = _bulletre.match(block['lines'][0])
300 300 subindent = indent + m.end() * ' '
301 301 elif block['type'] == 'field':
302 302 keywidth = block['keywidth']
303 303 key = block['key']
304 304
305 305 subindent = indent + _fieldwidth * ' '
306 306 if len(key) + 2 > _fieldwidth:
307 307 # key too large, use full line width
308 308 key = key.ljust(width)
309 309 elif keywidth + 2 < _fieldwidth:
310 310 # all keys are small, add only two spaces
311 311 key = key.ljust(keywidth + 2)
312 312 subindent = indent + (keywidth + 2) * ' '
313 313 else:
314 314 # mixed sizes, use fieldwidth for this one
315 315 key = key.ljust(_fieldwidth)
316 316 block['lines'][0] = key + block['lines'][0]
317 317 elif block['type'] == 'option':
318 318 m = _optionre.match(block['lines'][0])
319 319 option, arg, rest = m.groups()
320 320 subindent = indent + (len(option) + len(arg)) * ' '
321 321
322 322 text = ' '.join(map(str.strip, block['lines']))
323 323 return textwrap.fill(text, width=width,
324 initial_indent=initindent,
324 initial_indent=indent,
325 325 subsequent_indent=subindent)
326 326
327 327
328 328 def format(text, width, indent=0, keep=None):
329 329 """Parse and format the text according to width."""
330 330 blocks = findblocks(text)
331 331 for b in blocks:
332 332 b['indent'] += indent
333 333 blocks = findliteralblocks(blocks)
334 334 blocks, pruned = prunecontainers(blocks, keep or [])
335 335 blocks = inlineliterals(blocks)
336 336 blocks = splitparagraphs(blocks)
337 337 blocks = updatefieldlists(blocks)
338 338 blocks = findsections(blocks)
339 339 blocks = addmargins(blocks)
340 340 text = '\n'.join(formatblock(b, width) for b in blocks)
341 341 if keep is None:
342 342 return text
343 343 else:
344 344 return text, pruned
345 345
346 346
347 347 if __name__ == "__main__":
348 348 from pprint import pprint
349 349
350 350 def debug(func, *args):
351 351 blocks = func(*args)
352 352 print "*** after %s:" % func.__name__
353 353 pprint(blocks)
354 354 print
355 355 return blocks
356 356
357 357 text = open(sys.argv[1]).read()
358 358 blocks = debug(findblocks, text)
359 359 blocks = debug(findliteralblocks, blocks)
360 360 blocks = debug(prunecontainers, blocks, sys.argv[2:])
361 361 blocks = debug(inlineliterals, blocks)
362 362 blocks = debug(splitparagraphs, blocks)
363 363 blocks = debug(updatefieldlists, blocks)
364 364 blocks = debug(findsections, blocks)
365 365 blocks = debug(addmargins, blocks)
366 366 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now