##// END OF EJS Templates
minirst: update module docstring
Martin Geisler -
r9741:245689e7 default
parent child Browse files
Show More
@@ -1,275 +1,279 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 - sections
20
19 21 - paragraphs
20 22
21 - definition lists (must use ' ' to indent definitions)
23 - literal blocks
24
25 - definition lists
22 26
23 - lists (items must start with '-')
27 - bullet lists (items must start with '-')
28
29 - enumerated lists (no autonumbering)
24 30
25 31 - field lists (colons cannot be escaped)
26 32
27 - literal blocks
28
29 33 - option lists (supports only long options without arguments)
30 34
31 - inline markup is not recognized at all.
35 - inline literals (no other inline markup is not recognized)
32 36 """
33 37
34 38 import re, sys, textwrap
35 39
36 40
37 41 def findblocks(text):
38 42 """Find continuous blocks of lines in text.
39 43
40 44 Returns a list of dictionaries representing the blocks. Each block
41 45 has an 'indent' field and a 'lines' field.
42 46 """
43 47 blocks = [[]]
44 48 lines = text.splitlines()
45 49 for line in lines:
46 50 if line.strip():
47 51 blocks[-1].append(line)
48 52 elif blocks[-1]:
49 53 blocks.append([])
50 54 if not blocks[-1]:
51 55 del blocks[-1]
52 56
53 57 for i, block in enumerate(blocks):
54 58 indent = min((len(l) - len(l.lstrip())) for l in block)
55 59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
56 60 return blocks
57 61
58 62
59 63 def findliteralblocks(blocks):
60 64 """Finds literal blocks and adds a 'type' field to the blocks.
61 65
62 66 Literal blocks are given the type 'literal', all other blocks are
63 67 given type the 'paragraph'.
64 68 """
65 69 i = 0
66 70 while i < len(blocks):
67 71 # Searching for a block that looks like this:
68 72 #
69 73 # +------------------------------+
70 74 # | paragraph |
71 75 # | (ends with "::") |
72 76 # +------------------------------+
73 77 # +---------------------------+
74 78 # | indented literal block |
75 79 # +---------------------------+
76 80 blocks[i]['type'] = 'paragraph'
77 81 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
78 82 indent = blocks[i]['indent']
79 83 adjustment = blocks[i+1]['indent'] - indent
80 84
81 85 if blocks[i]['lines'] == ['::']:
82 86 # Expanded form: remove block
83 87 del blocks[i]
84 88 i -= 1
85 89 elif blocks[i]['lines'][-1].endswith(' ::'):
86 90 # Partially minimized form: remove space and both
87 91 # colons.
88 92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
89 93 else:
90 94 # Fully minimized form: remove just one colon.
91 95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
92 96
93 97 # List items are formatted with a hanging indent. We must
94 98 # correct for this here while we still have the original
95 99 # information on the indentation of the subsequent literal
96 100 # blocks available.
97 101 m = _bulletre.match(blocks[i]['lines'][0])
98 102 if m:
99 103 indent += m.end()
100 104 adjustment -= m.end()
101 105
102 106 # Mark the following indented blocks.
103 107 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
104 108 blocks[i+1]['type'] = 'literal'
105 109 blocks[i+1]['indent'] -= adjustment
106 110 i += 1
107 111 i += 1
108 112 return blocks
109 113
110 114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)) ')
111 115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
112 116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
113 117 _definitionre = re.compile(r'[^ ]')
114 118
115 119 def splitparagraphs(blocks):
116 120 """Split paragraphs into lists."""
117 121 # Tuples with (list type, item regexp, single line items?). Order
118 122 # matters: definition lists has the least specific regexp and must
119 123 # come last.
120 124 listtypes = [('bullet', _bulletre, True),
121 125 ('option', _optionre, True),
122 126 ('field', _fieldre, True),
123 127 ('definition', _definitionre, False)]
124 128
125 129 def match(lines, i, itemre, singleline):
126 130 """Does itemre match an item at line i?
127 131
128 132 A list item can be followed by an idented line or another list
129 133 item (but only if singleline is True).
130 134 """
131 135 line1 = lines[i]
132 136 line2 = i+1 < len(lines) and lines[i+1] or ''
133 137 if not itemre.match(line1):
134 138 return False
135 139 if singleline:
136 140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
137 141 else:
138 142 return line2.startswith(' ')
139 143
140 144 i = 0
141 145 while i < len(blocks):
142 146 if blocks[i]['type'] == 'paragraph':
143 147 lines = blocks[i]['lines']
144 148 for type, itemre, singleline in listtypes:
145 149 if match(lines, 0, itemre, singleline):
146 150 items = []
147 151 for j, line in enumerate(lines):
148 152 if match(lines, j, itemre, singleline):
149 153 items.append(dict(type=type, lines=[],
150 154 indent=blocks[i]['indent']))
151 155 items[-1]['lines'].append(line)
152 156 blocks[i:i+1] = items
153 157 break
154 158 i += 1
155 159 return blocks
156 160
157 161
158 162 def findsections(blocks):
159 163 """Finds sections.
160 164
161 165 The blocks must have a 'type' field, i.e., they should have been
162 166 run through findliteralblocks first.
163 167 """
164 168 for block in blocks:
165 169 # Searching for a block that looks like this:
166 170 #
167 171 # +------------------------------+
168 172 # | Section title |
169 173 # | ------------- |
170 174 # +------------------------------+
171 175 if (block['type'] == 'paragraph' and
172 176 len(block['lines']) == 2 and
173 177 block['lines'][1] == '-' * len(block['lines'][0])):
174 178 block['type'] = 'section'
175 179 return blocks
176 180
177 181
178 182 def inlineliterals(blocks):
179 183 for b in blocks:
180 184 if b['type'] == 'paragraph':
181 185 b['lines'] = [l.replace('``', '"') for l in b['lines']]
182 186 return blocks
183 187
184 188
185 189 def addmargins(blocks):
186 190 """Adds empty blocks for vertical spacing.
187 191
188 192 This groups bullets, options, and definitions together with no vertical
189 193 space between them, and adds an empty block between all other blocks.
190 194 """
191 195 i = 1
192 196 while i < len(blocks):
193 197 if (blocks[i]['type'] == blocks[i-1]['type'] and
194 198 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
195 199 i += 1
196 200 else:
197 201 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
198 202 i += 2
199 203 return blocks
200 204
201 205
202 206 def formatblock(block, width):
203 207 """Format a block according to width."""
204 208 if width <= 0:
205 209 width = 78
206 210 indent = ' ' * block['indent']
207 211 if block['type'] == 'margin':
208 212 return ''
209 213 if block['type'] == 'literal':
210 214 indent += ' '
211 215 return indent + ('\n' + indent).join(block['lines'])
212 216 if block['type'] == 'section':
213 217 return indent + ('\n' + indent).join(block['lines'])
214 218 if block['type'] == 'definition':
215 219 term = indent + block['lines'][0]
216 220 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
217 221 defindent = indent + hang * ' '
218 222 text = ' '.join(map(str.strip, block['lines'][1:]))
219 223 return "%s\n%s" % (term, textwrap.fill(text, width=width,
220 224 initial_indent=defindent,
221 225 subsequent_indent=defindent))
222 226 initindent = subindent = indent
223 227 if block['type'] == 'bullet':
224 228 m = _bulletre.match(block['lines'][0])
225 229 if m:
226 230 subindent = indent + m.end() * ' '
227 231 elif block['type'] == 'field':
228 232 m = _fieldre.match(block['lines'][0])
229 233 if m:
230 234 key, spaces, rest = m.groups()
231 235 # Turn ":foo: bar" into "foo bar".
232 236 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
233 237 subindent = indent + (2 + len(key) + len(spaces)) * ' '
234 238 elif block['type'] == 'option':
235 239 m = _optionre.match(block['lines'][0])
236 240 if m:
237 241 option, arg, rest = m.groups()
238 242 subindent = indent + (len(option) + len(arg)) * ' '
239 243
240 244 text = ' '.join(map(str.strip, block['lines']))
241 245 return textwrap.fill(text, width=width,
242 246 initial_indent=initindent,
243 247 subsequent_indent=subindent)
244 248
245 249
246 250 def format(text, width, indent=0):
247 251 """Parse and format the text according to width."""
248 252 blocks = findblocks(text)
249 253 for b in blocks:
250 254 b['indent'] += indent
251 255 blocks = findliteralblocks(blocks)
252 256 blocks = inlineliterals(blocks)
253 257 blocks = splitparagraphs(blocks)
254 258 blocks = findsections(blocks)
255 259 blocks = addmargins(blocks)
256 260 return '\n'.join(formatblock(b, width) for b in blocks)
257 261
258 262
259 263 if __name__ == "__main__":
260 264 from pprint import pprint
261 265
262 266 def debug(func, blocks):
263 267 blocks = func(blocks)
264 268 print "*** after %s:" % func.__name__
265 269 pprint(blocks)
266 270 print
267 271 return blocks
268 272
269 273 text = open(sys.argv[1]).read()
270 274 blocks = debug(findblocks, text)
271 275 blocks = debug(findliteralblocks, blocks)
272 276 blocks = debug(splitparagraphs, blocks)
273 277 blocks = debug(findsections, blocks)
274 278 blocks = debug(addmargins, blocks)
275 279 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now