##// END OF EJS Templates
minirst: run inlineliterals too in debug mode
Martin Geisler -
r10063:a46478b8 default
parent child Browse files
Show More
@@ -1,279 +1,280 b''
1 1 # minirst.py - minimal reStructuredText parser
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 """simplified reStructuredText parser.
9 9
10 10 This parser knows just enough about reStructuredText to parse the
11 11 Mercurial docstrings.
12 12
13 13 It cheats in a major way: nested blocks are not really nested. They
14 14 are just indented blocks that look like they are nested. This relies
15 15 on the user to keep the right indentation for the blocks.
16 16
17 17 It only supports a small subset of reStructuredText:
18 18
19 19 - sections
20 20
21 21 - paragraphs
22 22
23 23 - literal blocks
24 24
25 25 - definition lists
26 26
27 27 - bullet lists (items must start with '-')
28 28
29 29 - enumerated lists (no autonumbering)
30 30
31 31 - field lists (colons cannot be escaped)
32 32
33 33 - option lists (supports only long options without arguments)
34 34
35 35 - inline literals (no other inline markup is not recognized)
36 36 """
37 37
38 38 import re, sys, textwrap
39 39
40 40
41 41 def findblocks(text):
42 42 """Find continuous blocks of lines in text.
43 43
44 44 Returns a list of dictionaries representing the blocks. Each block
45 45 has an 'indent' field and a 'lines' field.
46 46 """
47 47 blocks = [[]]
48 48 lines = text.splitlines()
49 49 for line in lines:
50 50 if line.strip():
51 51 blocks[-1].append(line)
52 52 elif blocks[-1]:
53 53 blocks.append([])
54 54 if not blocks[-1]:
55 55 del blocks[-1]
56 56
57 57 for i, block in enumerate(blocks):
58 58 indent = min((len(l) - len(l.lstrip())) for l in block)
59 59 blocks[i] = dict(indent=indent, lines=[l[indent:] for l in block])
60 60 return blocks
61 61
62 62
63 63 def findliteralblocks(blocks):
64 64 """Finds literal blocks and adds a 'type' field to the blocks.
65 65
66 66 Literal blocks are given the type 'literal', all other blocks are
67 67 given type the 'paragraph'.
68 68 """
69 69 i = 0
70 70 while i < len(blocks):
71 71 # Searching for a block that looks like this:
72 72 #
73 73 # +------------------------------+
74 74 # | paragraph |
75 75 # | (ends with "::") |
76 76 # +------------------------------+
77 77 # +---------------------------+
78 78 # | indented literal block |
79 79 # +---------------------------+
80 80 blocks[i]['type'] = 'paragraph'
81 81 if blocks[i]['lines'][-1].endswith('::') and i+1 < len(blocks):
82 82 indent = blocks[i]['indent']
83 83 adjustment = blocks[i+1]['indent'] - indent
84 84
85 85 if blocks[i]['lines'] == ['::']:
86 86 # Expanded form: remove block
87 87 del blocks[i]
88 88 i -= 1
89 89 elif blocks[i]['lines'][-1].endswith(' ::'):
90 90 # Partially minimized form: remove space and both
91 91 # colons.
92 92 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-3]
93 93 else:
94 94 # Fully minimized form: remove just one colon.
95 95 blocks[i]['lines'][-1] = blocks[i]['lines'][-1][:-1]
96 96
97 97 # List items are formatted with a hanging indent. We must
98 98 # correct for this here while we still have the original
99 99 # information on the indentation of the subsequent literal
100 100 # blocks available.
101 101 m = _bulletre.match(blocks[i]['lines'][0])
102 102 if m:
103 103 indent += m.end()
104 104 adjustment -= m.end()
105 105
106 106 # Mark the following indented blocks.
107 107 while i+1 < len(blocks) and blocks[i+1]['indent'] > indent:
108 108 blocks[i+1]['type'] = 'literal'
109 109 blocks[i+1]['indent'] -= adjustment
110 110 i += 1
111 111 i += 1
112 112 return blocks
113 113
114 114 _bulletre = re.compile(r'(-|[0-9A-Za-z]+\.|\(?[0-9A-Za-z]+\)) ')
115 115 _optionre = re.compile(r'^(--[a-z-]+)((?:[ =][a-zA-Z][\w-]*)? +)(.*)$')
116 116 _fieldre = re.compile(r':(?![: ])([^:]*)(?<! ):( +)(.*)')
117 117 _definitionre = re.compile(r'[^ ]')
118 118
119 119 def splitparagraphs(blocks):
120 120 """Split paragraphs into lists."""
121 121 # Tuples with (list type, item regexp, single line items?). Order
122 122 # matters: definition lists has the least specific regexp and must
123 123 # come last.
124 124 listtypes = [('bullet', _bulletre, True),
125 125 ('option', _optionre, True),
126 126 ('field', _fieldre, True),
127 127 ('definition', _definitionre, False)]
128 128
129 129 def match(lines, i, itemre, singleline):
130 130 """Does itemre match an item at line i?
131 131
132 132 A list item can be followed by an idented line or another list
133 133 item (but only if singleline is True).
134 134 """
135 135 line1 = lines[i]
136 136 line2 = i+1 < len(lines) and lines[i+1] or ''
137 137 if not itemre.match(line1):
138 138 return False
139 139 if singleline:
140 140 return line2 == '' or line2[0] == ' ' or itemre.match(line2)
141 141 else:
142 142 return line2.startswith(' ')
143 143
144 144 i = 0
145 145 while i < len(blocks):
146 146 if blocks[i]['type'] == 'paragraph':
147 147 lines = blocks[i]['lines']
148 148 for type, itemre, singleline in listtypes:
149 149 if match(lines, 0, itemre, singleline):
150 150 items = []
151 151 for j, line in enumerate(lines):
152 152 if match(lines, j, itemre, singleline):
153 153 items.append(dict(type=type, lines=[],
154 154 indent=blocks[i]['indent']))
155 155 items[-1]['lines'].append(line)
156 156 blocks[i:i+1] = items
157 157 break
158 158 i += 1
159 159 return blocks
160 160
161 161
162 162 def findsections(blocks):
163 163 """Finds sections.
164 164
165 165 The blocks must have a 'type' field, i.e., they should have been
166 166 run through findliteralblocks first.
167 167 """
168 168 for block in blocks:
169 169 # Searching for a block that looks like this:
170 170 #
171 171 # +------------------------------+
172 172 # | Section title |
173 173 # | ------------- |
174 174 # +------------------------------+
175 175 if (block['type'] == 'paragraph' and
176 176 len(block['lines']) == 2 and
177 177 block['lines'][1] == '-' * len(block['lines'][0])):
178 178 block['type'] = 'section'
179 179 return blocks
180 180
181 181
182 182 def inlineliterals(blocks):
183 183 for b in blocks:
184 184 if b['type'] == 'paragraph':
185 185 b['lines'] = [l.replace('``', '"') for l in b['lines']]
186 186 return blocks
187 187
188 188
189 189 def addmargins(blocks):
190 190 """Adds empty blocks for vertical spacing.
191 191
192 192 This groups bullets, options, and definitions together with no vertical
193 193 space between them, and adds an empty block between all other blocks.
194 194 """
195 195 i = 1
196 196 while i < len(blocks):
197 197 if (blocks[i]['type'] == blocks[i-1]['type'] and
198 198 blocks[i]['type'] in ('bullet', 'option', 'field', 'definition')):
199 199 i += 1
200 200 else:
201 201 blocks.insert(i, dict(lines=[''], indent=0, type='margin'))
202 202 i += 2
203 203 return blocks
204 204
205 205
206 206 def formatblock(block, width):
207 207 """Format a block according to width."""
208 208 if width <= 0:
209 209 width = 78
210 210 indent = ' ' * block['indent']
211 211 if block['type'] == 'margin':
212 212 return ''
213 213 if block['type'] == 'literal':
214 214 indent += ' '
215 215 return indent + ('\n' + indent).join(block['lines'])
216 216 if block['type'] == 'section':
217 217 return indent + ('\n' + indent).join(block['lines'])
218 218 if block['type'] == 'definition':
219 219 term = indent + block['lines'][0]
220 220 hang = len(block['lines'][-1]) - len(block['lines'][-1].lstrip())
221 221 defindent = indent + hang * ' '
222 222 text = ' '.join(map(str.strip, block['lines'][1:]))
223 223 return "%s\n%s" % (term, textwrap.fill(text, width=width,
224 224 initial_indent=defindent,
225 225 subsequent_indent=defindent))
226 226 initindent = subindent = indent
227 227 if block['type'] == 'bullet':
228 228 m = _bulletre.match(block['lines'][0])
229 229 if m:
230 230 subindent = indent + m.end() * ' '
231 231 elif block['type'] == 'field':
232 232 m = _fieldre.match(block['lines'][0])
233 233 if m:
234 234 key, spaces, rest = m.groups()
235 235 # Turn ":foo: bar" into "foo bar".
236 236 block['lines'][0] = '%s %s%s' % (key, spaces, rest)
237 237 subindent = indent + (2 + len(key) + len(spaces)) * ' '
238 238 elif block['type'] == 'option':
239 239 m = _optionre.match(block['lines'][0])
240 240 if m:
241 241 option, arg, rest = m.groups()
242 242 subindent = indent + (len(option) + len(arg)) * ' '
243 243
244 244 text = ' '.join(map(str.strip, block['lines']))
245 245 return textwrap.fill(text, width=width,
246 246 initial_indent=initindent,
247 247 subsequent_indent=subindent)
248 248
249 249
250 250 def format(text, width, indent=0):
251 251 """Parse and format the text according to width."""
252 252 blocks = findblocks(text)
253 253 for b in blocks:
254 254 b['indent'] += indent
255 255 blocks = findliteralblocks(blocks)
256 256 blocks = inlineliterals(blocks)
257 257 blocks = splitparagraphs(blocks)
258 258 blocks = findsections(blocks)
259 259 blocks = addmargins(blocks)
260 260 return '\n'.join(formatblock(b, width) for b in blocks)
261 261
262 262
263 263 if __name__ == "__main__":
264 264 from pprint import pprint
265 265
266 266 def debug(func, blocks):
267 267 blocks = func(blocks)
268 268 print "*** after %s:" % func.__name__
269 269 pprint(blocks)
270 270 print
271 271 return blocks
272 272
273 273 text = open(sys.argv[1]).read()
274 274 blocks = debug(findblocks, text)
275 275 blocks = debug(findliteralblocks, blocks)
276 blocks = debug(inlineliterals, blocks)
276 277 blocks = debug(splitparagraphs, blocks)
277 278 blocks = debug(findsections, blocks)
278 279 blocks = debug(addmargins, blocks)
279 280 print '\n'.join(formatblock(b, 30) for b in blocks)
General Comments 0
You need to be logged in to leave comments. Login now