##// END OF EJS Templates
Moved text parser and preparser to a separate module (BB-64)
neko259 -
r1066:dc65b709 default
parent child Browse files
Show More
@@ -2,6 +2,9 b''
2 2
3 3 import re
4 4 import bbcode
5
6 from urllib.parse import unquote
7
5 8 from django.core.exceptions import ObjectDoesNotExist
6 9 from django.core.urlresolvers import reverse
7 10
@@ -14,6 +17,7 b' import boards'
14 17 REFLINK_PATTERN = re.compile(r'^\d+$')
15 18 MULTI_NEWLINES_PATTERN = re.compile(r'(\r?\n){2,}')
16 19 ONE_NEWLINE = '\n'
20 REGEX_URL = re.compile(r'https?\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?')
17 21
18 22
19 23 class TextFormatter():
@@ -169,36 +173,6 b' def render_notification(tag_name, value,'
169 173 reverse('notifications', kwargs={'username': username}), username)
170 174
171 175
172 def preparse_text(text):
173 """
174 Performs manual parsing before the bbcode parser is used.
175 """
176
177 return MULTI_NEWLINES_PATTERN.sub(ONE_NEWLINE, text)
178
179
180 def bbcode_extended(markup):
181 # The newline hack is added because br's margin does not work in all
182 # browsers except firefox, when the div's does.
183 parser = bbcode.Parser(newline='<div class="br"></div>')
184 parser.add_formatter('post', render_reflink, strip=True)
185 parser.add_formatter('thread', render_multithread, strip=True)
186 parser.add_formatter('quote', render_quote, strip=True)
187 parser.add_formatter('user', render_notification, strip=True)
188 parser.add_simple_formatter('comment',
189 '<span class="comment">//%(value)s</span>')
190 parser.add_simple_formatter('spoiler',
191 '<span class="spoiler">%(value)s</span>')
192 parser.add_simple_formatter('s',
193 '<span class="strikethrough">%(value)s</span>')
194 # TODO Why not use built-in tag?
195 parser.add_simple_formatter('code',
196 '<pre><code>%(value)s</pre></code>',
197 render_embedded=False)
198
199 text = preparse_text(markup)
200 return parser.format(text)
201
202 176 formatters = [
203 177 QuotePattern,
204 178 SpoilerPattern,
@@ -208,3 +182,52 b' formatters = ['
208 182 StrikeThroughPattern,
209 183 CodePattern,
210 184 ]
185
186
187 PREPARSE_PATTERNS = {
188 r'>>>(\d+)': r'[thread]\1[/thread]', # Multi-thread post ">>>123"
189 r'(?<!>)>>(\d+)': r'[post]\1[/post]', # Reflink ">>123"
190 r'^>([^>].+)': r'[quote]\1[/quote]', # Quote ">text"
191 r'^//(.+)': r'[comment]\1[/comment]', # Comment "//text"
192 r'\B@(\w+)': r'[user]\1[/user]', # User notification "@user"
193 }
194
195
196 class Parser:
197 def __init__(self):
198 # The newline hack is added because br's margin does not work in all
199 # browsers except firefox, when the div's does.
200 self.parser = bbcode.Parser(newline='<div class="br"></div>')
201
202 self.parser.add_formatter('post', render_reflink, strip=True)
203 self.parser.add_formatter('thread', render_multithread, strip=True)
204 self.parser.add_formatter('quote', render_quote, strip=True)
205 self.parser.add_formatter('user', render_notification, strip=True)
206 self.parser.add_simple_formatter(
207 'comment', '<span class="comment">//%(value)s</span>')
208 self.parser.add_simple_formatter(
209 'spoiler', '<span class="spoiler">%(value)s</span>')
210 self.parser.add_simple_formatter(
211 's', '<span class="strikethrough">%(value)s</span>')
212 # TODO Why not use built-in tag?
213 self.parser.add_simple_formatter('code',
214 '<pre><code>%(value)s</pre></code>',
215 render_embedded=False)
216
217 def preparse(self, text):
218 """
219 Performs manual parsing before the bbcode parser is used.
220 Preparsed text is saved as raw and the text before preparsing is lost.
221 """
222 new_text = MULTI_NEWLINES_PATTERN.sub(ONE_NEWLINE, text)
223
224 for key, value in PREPARSE_PATTERNS.items():
225 new_text = re.sub(key, value, new_text, flags=re.MULTILINE)
226
227 for link in REGEX_URL.findall(text):
228 new_text = new_text.replace(link, unquote(link))
229
230 return new_text
231
232 def parse(self, text):
233 return self.parser.format(text) No newline at end of file
@@ -3,8 +3,6 b' from datetime import time as dtime'
3 3 import logging
4 4 import re
5 5
6 from urllib.parse import unquote
7
8 6 from adjacent import Client
9 7 from django.core.exceptions import ObjectDoesNotExist
10 8 from django.core.urlresolvers import reverse
@@ -14,7 +12,7 b' from django.template.loader import rende'
14 12 from django.utils import timezone
15 13
16 14 from boards import settings
17 from boards.mdx_neboard import bbcode_extended
15 from boards.mdx_neboard import Parser
18 16 from boards.models import PostImage
19 17 from boards.models.base import Viewable
20 18 from boards.utils import datetime_to_epoch, cached_result
@@ -45,7 +43,6 b" UNKNOWN_UA = ''"
45 43
46 44 REGEX_REPLY = re.compile(r'\[post\](\d+)\[/post\]')
47 45 REGEX_MULTI_THREAD = re.compile(r'\[thread\](\d+)\[/thread\]')
48 REGEX_URL = re.compile(r'https?\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?')
49 46 REGEX_NOTIFICATION = re.compile(r'\[user\](\w+)\[/user\]')
50 47
51 48 PARAMETER_TRUNCATED = 'truncated'
@@ -64,14 +61,6 b" PARAMETER_REPLY_LINK = 'reply_link'"
64 61 DIFF_TYPE_HTML = 'html'
65 62 DIFF_TYPE_JSON = 'json'
66 63
67 PREPARSE_PATTERNS = {
68 r'>>>(\d+)': r'[thread]\1[/thread]', # Multi-thread post ">>>123"
69 r'(?<!>)>>(\d+)': r'[post]\1[/post]', # Reflink ">>123"
70 r'^>([^>].+)': r'[quote]\1[/quote]', # Quote ">text"
71 r'^//(.+)': r'[comment]\1[/comment]', # Comment "//text"
72 r'\B@(\w+)': r'[user]\1[/user]', # User notification "@user"
73 }
74
75 64
76 65 class PostManager(models.Manager):
77 66 @transaction.atomic
@@ -92,7 +81,7 b' class PostManager(models.Manager):'
92 81 else:
93 82 new_thread = False
94 83
95 pre_text = self._preparse_text(text)
84 pre_text = Parser().preparse(text)
96 85
97 86 post = self.create(title=title,
98 87 text=pre_text,
@@ -158,21 +147,6 b' class PostManager(models.Manager):'
158 147
159 148 return ppd
160 149
161 # TODO Make a separate parser module and move preparser there
162 def _preparse_text(self, text: str) -> str:
163 """
164 Preparses text to change patterns like '>>' to a proper bbcode
165 tags.
166 """
167
168 for key, value in PREPARSE_PATTERNS.items():
169 text = re.sub(key, value, text, flags=re.MULTILINE)
170
171 for link in REGEX_URL.findall(text):
172 text = text.replace(link, unquote(link))
173
174 return text
175
176 150
177 151 class Post(models.Model, Viewable):
178 152 """A post is a message."""
@@ -398,7 +372,7 b' class Post(models.Model, Viewable):'
398 372
399 373 def save(self, force_insert=False, force_update=False, using=None,
400 374 update_fields=None):
401 self._text_rendered = bbcode_extended(self.get_raw_text())
375 self._text_rendered = Parser().parse(self.get_raw_text())
402 376
403 377 super().save(force_insert, force_update, using, update_fields)
404 378
@@ -136,7 +136,6 b''
136 136 <div class="form-submit">
137 137 <input type="submit" value="{% trans "Post" %}"/>
138 138 </div>
139 (ctrl-enter)
140 139 </form>
141 140 </div>
142 141 <div>
@@ -1,11 +1,12 b''
1 1 from django.test import TestCase
2 from boards.mdx_neboard import Parser
2 3 from boards.models import Post
3 4
4 5
5 6 class ParserTest(TestCase):
6 7 def test_preparse_quote(self):
7 8 raw_text = '>quote\nQuote in >line\nLine\n>Quote'
8 preparsed_text = Post.objects._preparse_text(raw_text)
9 preparsed_text = Parser().preparse(raw_text)
9 10
10 11 self.assertEqual(
11 12 '[quote]quote[/quote]\nQuote in >line\nLine\n[quote]Quote[/quote]',
@@ -13,21 +14,21 b' class ParserTest(TestCase):'
13 14
14 15 def test_preparse_comment(self):
15 16 raw_text = '//comment'
16 preparsed_text = Post.objects._preparse_text(raw_text)
17 preparsed_text = Parser().preparse(raw_text)
17 18
18 19 self.assertEqual('[comment]comment[/comment]', preparsed_text,
19 20 'Comment not preparsed.')
20 21
21 22 def test_preparse_reflink(self):
22 23 raw_text = '>>12\nText'
23 preparsed_text = Post.objects._preparse_text(raw_text)
24 preparsed_text = Parser().preparse(raw_text)
24 25
25 26 self.assertEqual('[post]12[/post]\nText',
26 27 preparsed_text, 'Reflink not preparsed.')
27 28
28 29 def preparse_user(self):
29 30 raw_text = '@user\nuser@example.com\n@user\nuser @user'
30 preparsed_text = Post.objects._preparse_text(raw_text)
31 preparsed_text = Parser().preparse(raw_text)
31 32
32 33 self.assertEqual('[user]user[/user]\nuser@example.com\n[user]user[/user]\nuser [user]user[/user]',
33 34 preparsed_text, 'User link not preparsed.')
@@ -2,7 +2,8 b' from django.shortcuts import render'
2 2 from django.template import RequestContext
3 3 from django.views.generic import View
4 4
5 from boards.mdx_neboard import bbcode_extended
5 from boards.mdx_neboard import Parser
6
6 7
7 8 FORM_QUERY = 'query'
8 9
@@ -28,7 +29,8 b' class PostPreviewView(View):'
28 29 raw_text = request.POST[FORM_QUERY]
29 30
30 31 if len(raw_text) >= 0:
31 rendered_text = bbcode_extended(raw_text)
32 parser = Parser()
33 rendered_text = parser.parse(parser.preparse(raw_text))
32 34
33 35 context[CONTEXT_RESULT] = rendered_text
34 36 context[CONTEXT_QUERY] = raw_text
@@ -1,6 +1,5 b''
1 1 # Django settings for neboard project.
2 2 import os
3 from boards.mdx_neboard import bbcode_extended
4 3
5 4 DEBUG = True
6 5 TEMPLATE_DEBUG = DEBUG
General Comments 0
You need to be logged in to leave comments. Login now