##// END OF EJS Templates
minirst: convert ``foo`` into "foo" upon display...
Martin Geisler -
r9623:32727ce0 default
parent child Browse files
Show More
@@ -1,347 +1,355 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - paragraphs
20 20
21 21 - definition lists (must use ' ' to indent definitions)
22 22
23 23 - lists (items must start with '-')
24 24
25 25 - field lists (colons cannot be escaped)
26 26
27 27 - literal blocks
28 28
29 29 - option lists (supports only long options without arguments)
30 30
31 31 - inline markup is not recognized at all.
32 32 """
33 33
34 34 import re, sys, textwrap
35 35
36 36
37 37 def findblocks(text):
38 38 """Find continuous blocks of lines in text.
39 39
40 40 Returns a list of dictionaries representing the blocks. Each block
41 41 has an 'indent' field and a 'lines' field.
42 42 """
43 43 blocks = [[]]
44 44 lines = text.splitlines()
45 45 for line in lines:
46 46 if line.strip():
47 47 blocks[-1].append(line)
48 48 elif blocks[-1]:
49 49 blocks.append([])
50 50 if not blocks[-1]:
51 51 del blocks[-1]
52 52
53 53 for i, block in enumerate(blocks):
54 54 indent = min((len(l) - len(l.lstrip())) for l in block)
55 55 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 56 return blocks
57 57
58 58
59 59 def findliteralblocks(blocks):
60 60 """Finds literal blocks and adds a 'type' field to the blocks.
61 61
62 62 Literal blocks are given the type 'literal', all other blocks are
63 63 given type the 'paragraph'.
64 64 """
65 65 i = 0
66 66 while i < len(blocks):
67 67 # Searching for a block that looks like this:
68 68 #
69 69 # +------------------------------+
70 70 # | paragraph |
71 71 # | (ends with "::") |
72 72 # +------------------------------+
73 73 # +---------------------------+
74 74 # | indented literal block |
75 75 # +---------------------------+
76 76 blocks[i]['type'] = 'paragraph'
77 77 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 78 indent = blocks[i]['indent']
79 79 adjustment = blocks[i+1]['indent'] - indent
80 80
81 81 if blocks[i]['lines'] == ['::']:
82 82 # Expanded form: remove block
83 83 del blocks[i]
84 84 i -= 1
85 85 elif blocks[i]['lines'][-1].endswith(' ::'):
86 86 # Partially minimized form: remove space and both
87 87 # colons.
88 88 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 89 else:
90 90 # Fully minimized form: remove just one colon.
91 91 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92 92
93 93 # List items are formatted with a hanging indent. We must
94 94 # correct for this here while we still have the original
95 95 # information on the indentation of the subsequent literal
96 96 # blocks available.
97 97 if blocks[i]['lines'][0].startswith('- '):
98 98 indent += 2
99 99 adjustment -= 2
100 100
101 101 # Mark the following indented blocks.
102 102 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
103 103 blocks[i+1]['type'] = 'literal'
104 104 blocks[i+1]['indent'] -= adjustment
105 105 i += 1
106 106 i += 1
107 107 return blocks
108 108
109 109
110 110 def findsections(blocks):
111 111 """Finds sections.
112 112
113 113 The blocks must have a 'type' field, i.e., they should have been
114 114 run through findliteralblocks first.
115 115 """
116 116 for block in blocks:
117 117 # Searching for a block that looks like this:
118 118 #
119 119 # +------------------------------+
120 120 # | Section title |
121 121 # | ------------- |
122 122 # +------------------------------+
123 123 if (block['type'] == 'paragraph' and
124 124 len(block['lines']) == 2 and
125 125 block['lines'][1] == '-' * len(block['lines'][0])):
126 126 block['type'] = 'section'
127 127 return blocks
128 128
129 129
130 130 def findbulletlists(blocks):
131 131 """Finds bullet lists.
132 132
133 133 The blocks must have a 'type' field, i.e., they should have been
134 134 run through findliteralblocks first.
135 135 """
136 136 i = 0
137 137 while i < len(blocks):
138 138 # Searching for a paragraph that looks like this:
139 139 #
140 140 # +------+-----------------------+
141 141 # | "- " | list item |
142 142 # +------| (body elements)+ |
143 143 # +-----------------------+
144 144 if (blocks[i]['type'] == 'paragraph' and
145 145 blocks[i]['lines'][0].startswith('- ')):
146 146 items = []
147 147 for line in blocks[i]['lines']:
148 148 if line.startswith('- '):
149 149 items.append(dict(type='bullet', lines=[],
150 150 indent=blocks[i]['indent']))
151 151 line = line[2:]
152 152 items[-1]['lines'].append(line)
153 153 blocks[i:i+1] = items
154 154 i += len(items) - 1
155 155 i += 1
156 156 return blocks
157 157
158 158
159 159 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
160 160 def findoptionlists(blocks):
161 161 """Finds option lists.
162 162
163 163 The blocks must have a 'type' field, i.e., they should have been
164 164 run through findliteralblocks first.
165 165 """
166 166 i = 0
167 167 while i < len(blocks):
168 168 # Searching for a paragraph that looks like this:
169 169 #
170 170 # +----------------------------+-------------+
171 171 # | "--" option " " | description |
172 172 # +-------+--------------------+ |
173 173 # | (body elements)+ |
174 174 # +----------------------------------+
175 175 if (blocks[i]['type'] == 'paragraph' and
176 176 _optionre.match(blocks[i]['lines'][0])):
177 177 options = []
178 178 for line in blocks[i]['lines']:
179 179 m = _optionre.match(line)
180 180 if m:
181 181 option, arg, rest = m.groups()
182 182 width = len(option) + len(arg)
183 183 options.append(dict(type='option', lines=[],
184 184 indent=blocks[i]['indent'],
185 185 width=width))
186 186 options[-1]['lines'].append(line)
187 187 blocks[i:i+1] = options
188 188 i += len(options) - 1
189 189 i += 1
190 190 return blocks
191 191
192 192
193 193 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
194 194 def findfieldlists(blocks):
195 195 """Finds fields lists.
196 196
197 197 The blocks must have a 'type' field, i.e., they should have been
198 198 run through findliteralblocks first.
199 199 """
200 200 i = 0
201 201 while i < len(blocks):
202 202 # Searching for a paragraph that looks like this:
203 203 #
204 204 #
205 205 # +--------------------+----------------------+
206 206 # | ":" field name ":" | field body |
207 207 # +-------+------------+ |
208 208 # | (body elements)+ |
209 209 # +-----------------------------------+
210 210 if (blocks[i]['type'] == 'paragraph' and
211 211 _fieldre.match(blocks[i]['lines'][0])):
212 212 indent = blocks[i]['indent']
213 213 fields = []
214 214 for line in blocks[i]['lines']:
215 215 m = _fieldre.match(line)
216 216 if m:
217 217 key, spaces, rest = m.groups()
218 218 width = 2 + len(key) + len(spaces)
219 219 fields.append(dict(type='field', lines=[],
220 220 indent=indent, width=width))
221 221 # Turn ":foo: bar" into "foo bar".
222 222 line = '%s %s%s' % (key, spaces, rest)
223 223 fields[-1]['lines'].append(line)
224 224 blocks[i:i+1] = fields
225 225 i += len(fields) - 1
226 226 i += 1
227 227 return blocks
228 228
229 229
230 230 def finddefinitionlists(blocks):
231 231 """Finds definition lists.
232 232
233 233 The blocks must have a 'type' field, i.e., they should have been
234 234 run through findliteralblocks first.
235 235 """
236 236 i = 0
237 237 while i < len(blocks):
238 238 # Searching for a paragraph that looks like this:
239 239 #
240 240 # +----------------------------+
241 241 # | term |
242 242 # +--+-------------------------+--+
243 243 # | definition |
244 244 # | (body elements)+ |
245 245 # +----------------------------+
246 246 if (blocks[i]['type'] == 'paragraph' and
247 247 len(blocks[i]['lines']) > 1 and
248 248 not blocks[i]['lines'][0].startswith(' ') and
249 249 blocks[i]['lines'][1].startswith(' ')):
250 250 definitions = []
251 251 for line in blocks[i]['lines']:
252 252 if not line.startswith(' '):
253 253 definitions.append(dict(type='definition', lines=[],
254 254 indent=blocks[i]['indent']))
255 255 definitions[-1]['lines'].append(line)
256 256 definitions[-1]['hang'] = len(line) - len(line.lstrip())
257 257 blocks[i:i+1] = definitions
258 258 i += len(definitions) - 1
259 259 i += 1
260 260 return blocks
261 261
262 262
263 def inlineliterals(blocks):
264 for b in blocks:
265 if b['type'] == 'paragraph':
266 b['lines'] = [l.replace('``', '"') for l in b['lines']]
267 return blocks
268
269
263 270 def addmargins(blocks):
264 271 """Adds empty blocks for vertical spacing.
265 272
266 273 This groups bullets, options, and definitions together with no vertical
267 274 space between them, and adds an empty block between all other blocks.
268 275 """
269 276 i = 1
270 277 while i < len(blocks):
271 278 if (blocks[i]['type'] == blocks[i-1]['type'] and
272 279 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
273 280 i += 1
274 281 else:
275 282 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
276 283 i += 2
277 284 return blocks
278 285
279 286
280 287 def formatblock(block, width):
281 288 """Format a block according to width."""
282 289 if width <= 0:
283 290 width = 78
284 291 indent = ' ' * block['indent']
285 292 if block['type'] == 'margin':
286 293 return ''
287 294 elif block['type'] == 'literal':
288 295 indent += ' '
289 296 return indent + ('\n' + indent).join(block['lines'])
290 297 elif block['type'] == 'section':
291 298 return indent + ('\n' + indent).join(block['lines'])
292 299 elif block['type'] == 'definition':
293 300 term = indent + block['lines'][0]
294 301 defindent = indent + block['hang'] * ' '
295 302 text = ' '.join(map(str.strip, block['lines'][1:]))
296 303 return "%s\n%s" % (term, textwrap.fill(text, width=width,
297 304 initial_indent=defindent,
298 305 subsequent_indent=defindent))
299 306 else:
300 307 initindent = subindent = indent
301 308 text = ' '.join(map(str.strip, block['lines']))
302 309 if block['type'] == 'bullet':
303 310 initindent = indent + '- '
304 311 subindent = indent + ' '
305 312 elif block['type'] in ('option', 'field'):
306 313 subindent = indent + block['width'] * ' '
307 314
308 315 return textwrap.fill(text, width=width,
309 316 initial_indent=initindent,
310 317 subsequent_indent=subindent)
311 318
312 319
313 320 def format(text, width, indent=0):
314 321 """Parse and format the text according to width."""
315 322 blocks = findblocks(text)
316 323 for b in blocks:
317 324 b['indent'] += indent
318 325 blocks = findliteralblocks(blocks)
326 blocks = inlineliterals(blocks)
319 327 blocks = findsections(blocks)
320 328 blocks = findbulletlists(blocks)
321 329 blocks = findoptionlists(blocks)
322 330 blocks = findfieldlists(blocks)
323 331 blocks = finddefinitionlists(blocks)
324 332 blocks = addmargins(blocks)
325 333 return '\n'.join(formatblock(b, width) for b in blocks)
326 334
327 335
328 336 if __name__ == "__main__":
329 337 from pprint import pprint
330 338
331 339 def debug(func, blocks):
332 340 blocks = func(blocks)
333 341 print "*** after %s:" % func.__name__
334 342 pprint(blocks)
335 343 print
336 344 return blocks
337 345
338 346 text = open(sys.argv[1]).read()
339 347 blocks = debug(findblocks, text)
340 348 blocks = debug(findliteralblocks, blocks)
341 349 blocks = debug(findsections, blocks)
342 350 blocks = debug(findbulletlists, blocks)
343 351 blocks = debug(findoptionlists, blocks)
344 352 blocks = debug(findfieldlists, blocks)
345 353 blocks = debug(finddefinitionlists, blocks)
346 354 blocks = debug(addmargins, blocks)
347 355 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now