##// END OF EJS Templates
minirst: handle line breaks in hg role
Martin Geisler -
r11192:babf9a5f default
parent child Browse files
Show More
@@ -1,382 +1,385 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009, 2010 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - sections
20 20
21 21 - paragraphs
22 22
23 23 - literal blocks
24 24
25 25 - definition lists
26 26
27 27 - bullet lists (items must start with '-')
28 28
29 29 - enumerated lists (no autonumbering)
30 30
31 31 - field lists (colons cannot be escaped)
32 32
33 33 - option lists (supports only long options without arguments)
34 34
35 35 - inline literals (no other inline markup is not recognized)
36 36 """
37 37
38 38 import re, sys, textwrap
39 39
40 40
41 41 def findblocks(text):
42 42 """Find continuous blocks of lines in text.
43 43
44 44 Returns a list of dictionaries representing the blocks. Each block
45 45 has an 'indent' field and a 'lines' field.
46 46 """
47 47 blocks = [[]]
48 48 lines = text.splitlines()
49 49 for line in lines:
50 50 if line.strip():
51 51 blocks[-1].append(line)
52 52 elif blocks[-1]:
53 53 blocks.append([])
54 54 if not blocks[-1]:
55 55 del blocks[-1]
56 56
57 57 for i, block in enumerate(blocks):
58 58 indent = min((len(l) - len(l.lstrip())) for l in block)
59 59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
60 60 return blocks
61 61
62 62
63 63 def findliteralblocks(blocks):
64 64 """Finds literal blocks and adds a 'type' field to the blocks.
65 65
66 66 Literal blocks are given the type 'literal', all other blocks are
67 67 given type the 'paragraph'.
68 68 """
69 69 i = 0
70 70 while i < len(blocks):
71 71 # Searching for a block that looks like this:
72 72 #
73 73 # +------------------------------+
74 74 # | paragraph |
75 75 # | (ends with "::") |
76 76 # +------------------------------+
77 77 # +---------------------------+
78 78 # | indented literal block |
79 79 # +---------------------------+
80 80 blocks[i]['type'] = 'paragraph'
81 81 if blocks[i]['lines'][-1].endswith('::') and i + 1 < len(blocks):
82 82 indent = blocks[i]['indent']
83 83 adjustment = blocks[i + 1]['indent'] - indent
84 84
85 85 if blocks[i]['lines'] == ['::']:
86 86 # Expanded form: remove block
87 87 del blocks[i]
88 88 i -= 1
89 89 elif blocks[i]['lines'][-1].endswith(' ::'):
90 90 # Partially minimized form: remove space and both
91 91 # colons.
92 92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
93 93 else:
94 94 # Fully minimized form: remove just one colon.
95 95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
96 96
97 97 # List items are formatted with a hanging indent. We must
98 98 # correct for this here while we still have the original
99 99 # information on the indentation of the subsequent literal
100 100 # blocks available.
101 101 m = _bulletre.match(blocks[i]['lines'][0])
102 102 if m:
103 103 indent += m.end()
104 104 adjustment -= m.end()
105 105
106 106 # Mark the following indented blocks.
107 107 while i + 1 < len(blocks) and blocks[i + 1]['indent'] > indent:
108 108 blocks[i + 1]['type'] = 'literal'
109 109 blocks[i + 1]['indent'] -= adjustment
110 110 i += 1
111 111 i += 1
112 112 return blocks
113 113
114 114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)|\|) ')
115 115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
116 116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):[ ]+(.*)')
117 117 _definitionre = re.compile(r'[^ ]')
118 118
119 119 def splitparagraphs(blocks):
120 120 """Split paragraphs into lists."""
121 121 # Tuples with (list type, item regexp, single line items?). Order
122 122 # matters: definition lists has the least specific regexp and must
123 123 # come last.
124 124 listtypes = [('bullet', _bulletre, True),
125 125 ('option', _optionre, True),
126 126 ('field', _fieldre, True),
127 127 ('definition', _definitionre, False)]
128 128
129 129 def match(lines, i, itemre, singleline):
130 130 """Does itemre match an item at line i?
131 131
132 132 A list item can be followed by an idented line or another list
133 133 item (but only if singleline is True).
134 134 """
135 135 line1 = lines[i]
136 136 line2 = i + 1 < len(lines) and lines[i + 1] or ''
137 137 if not itemre.match(line1):
138 138 return False
139 139 if singleline:
140 140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
141 141 else:
142 142 return line2.startswith(' ')
143 143
144 144 i = 0
145 145 while i < len(blocks):
146 146 if blocks[i]['type'] == 'paragraph':
147 147 lines = blocks[i]['lines']
148 148 for type, itemre, singleline in listtypes:
149 149 if match(lines, 0, itemre, singleline):
150 150 items = []
151 151 for j, line in enumerate(lines):
152 152 if match(lines, j, itemre, singleline):
153 153 items.append(dict(type=type, lines=[],
154 154 indent=blocks[i]['indent']))
155 155 items[-1]['lines'].append(line)
156 156 blocks[i:i + 1] = items
157 157 break
158 158 i += 1
159 159 return blocks
160 160
161 161
162 162 _fieldwidth = 12
163 163
164 164 def updatefieldlists(blocks):
165 165 """Find key and maximum key width for field lists."""
166 166 i = 0
167 167 while i < len(blocks):
168 168 if blocks[i]['type'] != 'field':
169 169 i += 1
170 170 continue
171 171
172 172 keywidth = 0
173 173 j = i
174 174 while j < len(blocks) and blocks[j]['type'] == 'field':
175 175 m = _fieldre.match(blocks[j]['lines'][0])
176 176 key, rest = m.groups()
177 177 blocks[j]['lines'][0] = rest
178 178 blocks[j]['key'] = key
179 179 keywidth = max(keywidth, len(key))
180 180 j += 1
181 181
182 182 for block in blocks[i:j]:
183 183 block['keywidth'] = keywidth
184 184 i = j + 1
185 185
186 186 return blocks
187 187
188 188
189 189 def prunecontainers(blocks, keep):
190 190 """Prune unwanted containers.
191 191
192 192 The blocks must have a 'type' field, i.e., they should have been
193 193 run through findliteralblocks first.
194 194 """
195 195 pruned = []
196 196 i = 0
197 197 while i + 1 < len(blocks):
198 198 # Searching for a block that looks like this:
199 199 #
200 200 # +-------+---------------------------+
201 201 # | ".. container ::" type |
202 202 # +---+ |
203 203 # | blocks |
204 204 # +-------------------------------+
205 205 if (blocks[i]['type'] == 'paragraph' and
206 206 blocks[i]['lines'][0].startswith('.. container::')):
207 207 indent = blocks[i]['indent']
208 208 adjustment = blocks[i + 1]['indent'] - indent
209 209 containertype = blocks[i]['lines'][0][15:]
210 210 prune = containertype not in keep
211 211 if prune:
212 212 pruned.append(containertype)
213 213
214 214 # Always delete "..container:: type" block
215 215 del blocks[i]
216 216 j = i
217 217 while j < len(blocks) and blocks[j]['indent'] > indent:
218 218 if prune:
219 219 del blocks[j]
220 220 i -= 1 # adjust outer index
221 221 else:
222 222 blocks[j]['indent'] -= adjustment
223 223 j += 1
224 224 i += 1
225 225 return blocks, pruned
226 226
227 227
228 228 _sectionre = re.compile(r"""^([-=`:.'"~^_*+#])\1+$""")
229 229
230 230 def findsections(blocks):
231 231 """Finds sections.
232 232
233 233 The blocks must have a 'type' field, i.e., they should have been
234 234 run through findliteralblocks first.
235 235 """
236 236 for block in blocks:
237 237 # Searching for a block that looks like this:
238 238 #
239 239 # +------------------------------+
240 240 # | Section title |
241 241 # | ------------- |
242 242 # +------------------------------+
243 243 if (block['type'] == 'paragraph' and
244 244 len(block['lines']) == 2 and
245 245 len(block['lines'][0]) == len(block['lines'][1]) and
246 246 _sectionre.match(block['lines'][1])):
247 247 block['underline'] = block['lines'][1][0]
248 248 block['type'] = 'section'
249 249 del block['lines'][1]
250 250 return blocks
251 251
252 252
253 253 def inlineliterals(blocks):
254 254 for b in blocks:
255 255 if b['type'] in ('paragraph', 'section'):
256 256 b['lines'] = [l.replace('``', '"') for l in b['lines']]
257 257 return blocks
258 258
259 259
260 _hgrolere = re.compile(r':hg:`([^`]+)`')
261
262 260 def hgrole(blocks):
263 261 for b in blocks:
264 262 if b['type'] in ('paragraph', 'section'):
265 b['lines'] = [_hgrolere.sub(r'"hg \1"', l) for l in b['lines']]
263 # Turn :hg:`command` into "hg command". This also works
264 # when there is a line break in the command and relies on
265 # the fact that we have no stray back-quotes in the input
266 # (run the blocks through inlineliterals first).
267 b['lines'] = [l.replace(':hg:`', '"hg ').replace('`', '"')
268 for l in b['lines']]
266 269 return blocks
267 270
268 271
269 272 def addmargins(blocks):
270 273 """Adds empty blocks for vertical spacing.
271 274
272 275 This groups bullets, options, and definitions together with no vertical
273 276 space between them, and adds an empty block between all other blocks.
274 277 """
275 278 i = 1
276 279 while i < len(blocks):
277 280 if (blocks[i]['type'] == blocks[i - 1]['type'] and
278 281 blocks[i]['type'] in ('bullet', 'option', 'field')):
279 282 i += 1
280 283 else:
281 284 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
282 285 i += 2
283 286 return blocks
284 287
285 288
286 289 def formatblock(block, width):
287 290 """Format a block according to width."""
288 291 if width <= 0:
289 292 width = 78
290 293 indent = ' ' * block['indent']
291 294 if block['type'] == 'margin':
292 295 return ''
293 296 if block['type'] == 'literal':
294 297 indent += ' '
295 298 return indent + ('\n' + indent).join(block['lines'])
296 299 if block['type'] == 'section':
297 300 underline = len(block['lines'][0]) * block['underline']
298 301 return "%s%s\n%s%s" % (indent, block['lines'][0],indent, underline)
299 302 if block['type'] == 'definition':
300 303 term = indent + block['lines'][0]
301 304 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
302 305 defindent = indent + hang * ' '
303 306 text = ' '.join(map(str.strip, block['lines'][1:]))
304 307 return "%s\n%s" % (term, textwrap.fill(text, width=width,
305 308 initial_indent=defindent,
306 309 subsequent_indent=defindent))
307 310 subindent = indent
308 311 if block['type'] == 'bullet':
309 312 if block['lines'][0].startswith('| '):
310 313 # Remove bullet for line blocks and add no extra
311 314 # indention.
312 315 block['lines'][0] = block['lines'][0][2:]
313 316 else:
314 317 m = _bulletre.match(block['lines'][0])
315 318 subindent = indent + m.end() * ' '
316 319 elif block['type'] == 'field':
317 320 keywidth = block['keywidth']
318 321 key = block['key']
319 322
320 323 subindent = indent + _fieldwidth * ' '
321 324 if len(key) + 2 > _fieldwidth:
322 325 # key too large, use full line width
323 326 key = key.ljust(width)
324 327 elif keywidth + 2 < _fieldwidth:
325 328 # all keys are small, add only two spaces
326 329 key = key.ljust(keywidth + 2)
327 330 subindent = indent + (keywidth + 2) * ' '
328 331 else:
329 332 # mixed sizes, use fieldwidth for this one
330 333 key = key.ljust(_fieldwidth)
331 334 block['lines'][0] = key + block['lines'][0]
332 335 elif block['type'] == 'option':
333 336 m = _optionre.match(block['lines'][0])
334 337 option, arg, rest = m.groups()
335 338 subindent = indent + (len(option) + len(arg)) * ' '
336 339
337 340 text = ' '.join(map(str.strip, block['lines']))
338 341 return textwrap.fill(text, width=width,
339 342 initial_indent=indent,
340 343 subsequent_indent=subindent)
341 344
342 345
343 346 def format(text, width, indent=0, keep=None):
344 347 """Parse and format the text according to width."""
345 348 blocks = findblocks(text)
346 349 for b in blocks:
347 350 b['indent'] += indent
348 351 blocks = findliteralblocks(blocks)
349 352 blocks, pruned = prunecontainers(blocks, keep or [])
350 353 blocks = findsections(blocks)
351 354 blocks = inlineliterals(blocks)
352 355 blocks = hgrole(blocks)
353 356 blocks = splitparagraphs(blocks)
354 357 blocks = updatefieldlists(blocks)
355 358 blocks = addmargins(blocks)
356 359 text = '\n'.join(formatblock(b, width) for b in blocks)
357 360 if keep is None:
358 361 return text
359 362 else:
360 363 return text, pruned
361 364
362 365
363 366 if __name__ == "__main__":
364 367 from pprint import pprint
365 368
366 369 def debug(func, *args):
367 370 blocks = func(*args)
368 371 print "*** after %s:" % func.__name__
369 372 pprint(blocks)
370 373 print
371 374 return blocks
372 375
373 376 text = open(sys.argv[1]).read()
374 377 blocks = debug(findblocks, text)
375 378 blocks = debug(findliteralblocks, blocks)
376 379 blocks, pruned = debug(prunecontainers, blocks, sys.argv[2:])
377 380 blocks = debug(inlineliterals, blocks)
378 381 blocks = debug(splitparagraphs, blocks)
379 382 blocks = debug(updatefieldlists, blocks)
380 383 blocks = debug(findsections, blocks)
381 384 blocks = debug(addmargins, blocks)
382 385 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now