##// END OF EJS Templates
Moved text parser and preparser to a separate module (BB-64)
neko259 -
r1066:dc65b709 default
parent child Browse files
Show More
@@ -2,6 +2,9 b''
2
2
3 import re
3 import re
4 import bbcode
4 import bbcode
5
6 from urllib.parse import unquote
7
5 from django.core.exceptions import ObjectDoesNotExist
8 from django.core.exceptions import ObjectDoesNotExist
6 from django.core.urlresolvers import reverse
9 from django.core.urlresolvers import reverse
7
10
@@ -14,6 +17,7 b' import boards'
14 REFLINK_PATTERN = re.compile(r'^\d+$')
17 REFLINK_PATTERN = re.compile(r'^\d+$')
15 MULTI_NEWLINES_PATTERN = re.compile(r'(\r?\n){2,}')
18 MULTI_NEWLINES_PATTERN = re.compile(r'(\r?\n){2,}')
16 ONE_NEWLINE = '\n'
19 ONE_NEWLINE = '\n'
20 REGEX_URL = re.compile(r'https?\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?')
17
21
18
22
19 class TextFormatter():
23 class TextFormatter():
@@ -169,36 +173,6 b' def render_notification(tag_name, value,'
169 reverse('notifications', kwargs={'username': username}), username)
173 reverse('notifications', kwargs={'username': username}), username)
170
174
171
175
172 def preparse_text(text):
173 """
174 Performs manual parsing before the bbcode parser is used.
175 """
176
177 return MULTI_NEWLINES_PATTERN.sub(ONE_NEWLINE, text)
178
179
180 def bbcode_extended(markup):
181 # The newline hack is added because br's margin does not work in all
182 # browsers except firefox, when the div's does.
183 parser = bbcode.Parser(newline='<div class="br"></div>')
184 parser.add_formatter('post', render_reflink, strip=True)
185 parser.add_formatter('thread', render_multithread, strip=True)
186 parser.add_formatter('quote', render_quote, strip=True)
187 parser.add_formatter('user', render_notification, strip=True)
188 parser.add_simple_formatter('comment',
189 '<span class="comment">//%(value)s</span>')
190 parser.add_simple_formatter('spoiler',
191 '<span class="spoiler">%(value)s</span>')
192 parser.add_simple_formatter('s',
193 '<span class="strikethrough">%(value)s</span>')
194 # TODO Why not use built-in tag?
195 parser.add_simple_formatter('code',
196 '<pre><code>%(value)s</pre></code>',
197 render_embedded=False)
198
199 text = preparse_text(markup)
200 return parser.format(text)
201
202 formatters = [
176 formatters = [
203 QuotePattern,
177 QuotePattern,
204 SpoilerPattern,
178 SpoilerPattern,
@@ -208,3 +182,52 b' formatters = ['
208 StrikeThroughPattern,
182 StrikeThroughPattern,
209 CodePattern,
183 CodePattern,
210 ]
184 ]
185
186
187 PREPARSE_PATTERNS = {
188 r'>>>(\d+)': r'[thread]\1[/thread]', # Multi-thread post ">>>123"
189 r'(?<!>)>>(\d+)': r'[post]\1[/post]', # Reflink ">>123"
190 r'^>([^>].+)': r'[quote]\1[/quote]', # Quote ">text"
191 r'^//(.+)': r'[comment]\1[/comment]', # Comment "//text"
192 r'\B@(\w+)': r'[user]\1[/user]', # User notification "@user"
193 }
194
195
196 class Parser:
197 def __init__(self):
198 # The newline hack is added because br's margin does not work in all
199 # browsers except firefox, when the div's does.
200 self.parser = bbcode.Parser(newline='<div class="br"></div>')
201
202 self.parser.add_formatter('post', render_reflink, strip=True)
203 self.parser.add_formatter('thread', render_multithread, strip=True)
204 self.parser.add_formatter('quote', render_quote, strip=True)
205 self.parser.add_formatter('user', render_notification, strip=True)
206 self.parser.add_simple_formatter(
207 'comment', '<span class="comment">//%(value)s</span>')
208 self.parser.add_simple_formatter(
209 'spoiler', '<span class="spoiler">%(value)s</span>')
210 self.parser.add_simple_formatter(
211 's', '<span class="strikethrough">%(value)s</span>')
212 # TODO Why not use built-in tag?
213 self.parser.add_simple_formatter('code',
214 '<pre><code>%(value)s</pre></code>',
215 render_embedded=False)
216
217 def preparse(self, text):
218 """
219 Performs manual parsing before the bbcode parser is used.
220 Preparsed text is saved as raw and the text before preparsing is lost.
221 """
222 new_text = MULTI_NEWLINES_PATTERN.sub(ONE_NEWLINE, text)
223
224 for key, value in PREPARSE_PATTERNS.items():
225 new_text = re.sub(key, value, new_text, flags=re.MULTILINE)
226
227 for link in REGEX_URL.findall(text):
228 new_text = new_text.replace(link, unquote(link))
229
230 return new_text
231
232 def parse(self, text):
233 return self.parser.format(text) No newline at end of file
@@ -3,8 +3,6 b' from datetime import time as dtime'
3 import logging
3 import logging
4 import re
4 import re
5
5
6 from urllib.parse import unquote
7
8 from adjacent import Client
6 from adjacent import Client
9 from django.core.exceptions import ObjectDoesNotExist
7 from django.core.exceptions import ObjectDoesNotExist
10 from django.core.urlresolvers import reverse
8 from django.core.urlresolvers import reverse
@@ -14,7 +12,7 b' from django.template.loader import rende'
14 from django.utils import timezone
12 from django.utils import timezone
15
13
16 from boards import settings
14 from boards import settings
17 from boards.mdx_neboard import bbcode_extended
15 from boards.mdx_neboard import Parser
18 from boards.models import PostImage
16 from boards.models import PostImage
19 from boards.models.base import Viewable
17 from boards.models.base import Viewable
20 from boards.utils import datetime_to_epoch, cached_result
18 from boards.utils import datetime_to_epoch, cached_result
@@ -45,7 +43,6 b" UNKNOWN_UA = ''"
45
43
46 REGEX_REPLY = re.compile(r'\[post\](\d+)\[/post\]')
44 REGEX_REPLY = re.compile(r'\[post\](\d+)\[/post\]')
47 REGEX_MULTI_THREAD = re.compile(r'\[thread\](\d+)\[/thread\]')
45 REGEX_MULTI_THREAD = re.compile(r'\[thread\](\d+)\[/thread\]')
48 REGEX_URL = re.compile(r'https?\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?')
49 REGEX_NOTIFICATION = re.compile(r'\[user\](\w+)\[/user\]')
46 REGEX_NOTIFICATION = re.compile(r'\[user\](\w+)\[/user\]')
50
47
51 PARAMETER_TRUNCATED = 'truncated'
48 PARAMETER_TRUNCATED = 'truncated'
@@ -64,14 +61,6 b" PARAMETER_REPLY_LINK = 'reply_link'"
64 DIFF_TYPE_HTML = 'html'
61 DIFF_TYPE_HTML = 'html'
65 DIFF_TYPE_JSON = 'json'
62 DIFF_TYPE_JSON = 'json'
66
63
67 PREPARSE_PATTERNS = {
68 r'>>>(\d+)': r'[thread]\1[/thread]', # Multi-thread post ">>>123"
69 r'(?<!>)>>(\d+)': r'[post]\1[/post]', # Reflink ">>123"
70 r'^>([^>].+)': r'[quote]\1[/quote]', # Quote ">text"
71 r'^//(.+)': r'[comment]\1[/comment]', # Comment "//text"
72 r'\B@(\w+)': r'[user]\1[/user]', # User notification "@user"
73 }
74
75
64
76 class PostManager(models.Manager):
65 class PostManager(models.Manager):
77 @transaction.atomic
66 @transaction.atomic
@@ -92,7 +81,7 b' class PostManager(models.Manager):'
92 else:
81 else:
93 new_thread = False
82 new_thread = False
94
83
95 pre_text = self._preparse_text(text)
84 pre_text = Parser().preparse(text)
96
85
97 post = self.create(title=title,
86 post = self.create(title=title,
98 text=pre_text,
87 text=pre_text,
@@ -158,21 +147,6 b' class PostManager(models.Manager):'
158
147
159 return ppd
148 return ppd
160
149
161 # TODO Make a separate parser module and move preparser there
162 def _preparse_text(self, text: str) -> str:
163 """
164 Preparses text to change patterns like '>>' to a proper bbcode
165 tags.
166 """
167
168 for key, value in PREPARSE_PATTERNS.items():
169 text = re.sub(key, value, text, flags=re.MULTILINE)
170
171 for link in REGEX_URL.findall(text):
172 text = text.replace(link, unquote(link))
173
174 return text
175
176
150
177 class Post(models.Model, Viewable):
151 class Post(models.Model, Viewable):
178 """A post is a message."""
152 """A post is a message."""
@@ -398,7 +372,7 b' class Post(models.Model, Viewable):'
398
372
399 def save(self, force_insert=False, force_update=False, using=None,
373 def save(self, force_insert=False, force_update=False, using=None,
400 update_fields=None):
374 update_fields=None):
401 self._text_rendered = bbcode_extended(self.get_raw_text())
375 self._text_rendered = Parser().parse(self.get_raw_text())
402
376
403 super().save(force_insert, force_update, using, update_fields)
377 super().save(force_insert, force_update, using, update_fields)
404
378
@@ -136,7 +136,6 b''
136 <div class="form-submit">
136 <div class="form-submit">
137 <input type="submit" value="{% trans "Post" %}"/>
137 <input type="submit" value="{% trans "Post" %}"/>
138 </div>
138 </div>
139 (ctrl-enter)
140 </form>
139 </form>
141 </div>
140 </div>
142 <div>
141 <div>
@@ -1,11 +1,12 b''
1 from django.test import TestCase
1 from django.test import TestCase
2 from boards.mdx_neboard import Parser
2 from boards.models import Post
3 from boards.models import Post
3
4
4
5
5 class ParserTest(TestCase):
6 class ParserTest(TestCase):
6 def test_preparse_quote(self):
7 def test_preparse_quote(self):
7 raw_text = '>quote\nQuote in >line\nLine\n>Quote'
8 raw_text = '>quote\nQuote in >line\nLine\n>Quote'
8 preparsed_text = Post.objects._preparse_text(raw_text)
9 preparsed_text = Parser().preparse(raw_text)
9
10
10 self.assertEqual(
11 self.assertEqual(
11 '[quote]quote[/quote]\nQuote in >line\nLine\n[quote]Quote[/quote]',
12 '[quote]quote[/quote]\nQuote in >line\nLine\n[quote]Quote[/quote]',
@@ -13,21 +14,21 b' class ParserTest(TestCase):'
13
14
14 def test_preparse_comment(self):
15 def test_preparse_comment(self):
15 raw_text = '//comment'
16 raw_text = '//comment'
16 preparsed_text = Post.objects._preparse_text(raw_text)
17 preparsed_text = Parser().preparse(raw_text)
17
18
18 self.assertEqual('[comment]comment[/comment]', preparsed_text,
19 self.assertEqual('[comment]comment[/comment]', preparsed_text,
19 'Comment not preparsed.')
20 'Comment not preparsed.')
20
21
21 def test_preparse_reflink(self):
22 def test_preparse_reflink(self):
22 raw_text = '>>12\nText'
23 raw_text = '>>12\nText'
23 preparsed_text = Post.objects._preparse_text(raw_text)
24 preparsed_text = Parser().preparse(raw_text)
24
25
25 self.assertEqual('[post]12[/post]\nText',
26 self.assertEqual('[post]12[/post]\nText',
26 preparsed_text, 'Reflink not preparsed.')
27 preparsed_text, 'Reflink not preparsed.')
27
28
28 def preparse_user(self):
29 def preparse_user(self):
29 raw_text = '@user\nuser@example.com\n@user\nuser @user'
30 raw_text = '@user\nuser@example.com\n@user\nuser @user'
30 preparsed_text = Post.objects._preparse_text(raw_text)
31 preparsed_text = Parser().preparse(raw_text)
31
32
32 self.assertEqual('[user]user[/user]\nuser@example.com\n[user]user[/user]\nuser [user]user[/user]',
33 self.assertEqual('[user]user[/user]\nuser@example.com\n[user]user[/user]\nuser [user]user[/user]',
33 preparsed_text, 'User link not preparsed.')
34 preparsed_text, 'User link not preparsed.')
@@ -2,7 +2,8 b' from django.shortcuts import render'
2 from django.template import RequestContext
2 from django.template import RequestContext
3 from django.views.generic import View
3 from django.views.generic import View
4
4
5 from boards.mdx_neboard import bbcode_extended
5 from boards.mdx_neboard import Parser
6
6
7
7 FORM_QUERY = 'query'
8 FORM_QUERY = 'query'
8
9
@@ -28,7 +29,8 b' class PostPreviewView(View):'
28 raw_text = request.POST[FORM_QUERY]
29 raw_text = request.POST[FORM_QUERY]
29
30
30 if len(raw_text) >= 0:
31 if len(raw_text) >= 0:
31 rendered_text = bbcode_extended(raw_text)
32 parser = Parser()
33 rendered_text = parser.parse(parser.preparse(raw_text))
32
34
33 context[CONTEXT_RESULT] = rendered_text
35 context[CONTEXT_RESULT] = rendered_text
34 context[CONTEXT_QUERY] = raw_text
36 context[CONTEXT_QUERY] = raw_text
@@ -1,6 +1,5 b''
1 # Django settings for neboard project.
1 # Django settings for neboard project.
2 import os
2 import os
3 from boards.mdx_neboard import bbcode_extended
4
3
5 DEBUG = True
4 DEBUG = True
6 TEMPLATE_DEBUG = DEBUG
5 TEMPLATE_DEBUG = DEBUG
General Comments 0
You need to be logged in to leave comments. Login now