upstream/kallithea Commit - r4009:7563624e

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

rhodecode.lib.markup_renderer

3

rhodecode.lib.markup_renderer

4

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

4

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

5

6

7

Renderer for markup languages with ability to parse using rst or markdown

7

Renderer for markup languages with ability to parse using rst or markdown

8

9

:created_on: Oct 27, 2011

9

:created_on: Oct 27, 2011

10

:author: marcink

10

:author: marcink

11

12

:license: GPLv3, see COPYING for more details.

12

:license: GPLv3, see COPYING for more details.

13

"""

13

"""

14

# This program is free software: you can redistribute it and/or modify

14

# This program is free software: you can redistribute it and/or modify

15

# it under the terms of the GNU General Public License as published by

15

# it under the terms of the GNU General Public License as published by

16

# the Free Software Foundation, either version 3 of the License, or

16

# the Free Software Foundation, either version 3 of the License, or

17

# (at your option) any later version.

17

# (at your option) any later version.

18

#

18

#

19

# This program is distributed in the hope that it will be useful,

19

# This program is distributed in the hope that it will be useful,

20

# but WITHOUT ANY WARRANTY; without even the implied warranty of

20

# but WITHOUT ANY WARRANTY; without even the implied warranty of

21

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

21

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

22

# GNU General Public License for more details.

22

# GNU General Public License for more details.

23

#

23

#

24

# You should have received a copy of the GNU General Public License

24

# You should have received a copy of the GNU General Public License

25

# along with this program. If not, see <http://www.gnu.org/licenses/>.

25

# along with this program. If not, see <http://www.gnu.org/licenses/>.

26

27

import re

27

import re

28

import logging

28

import logging

29

import traceback

29

import traceback

30

31

from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX

31

from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX

32

33

log = logging.getLogger(__name__)

33

log = logging.getLogger(__name__)

34

35

36

class MarkupRenderer(object):

36

class MarkupRenderer(object):

37

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

37

RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']

38

39

MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)

39

MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)

40

RST_PAT = re.compile(r're?st', re.IGNORECASE)

40

RST_PAT = re.compile(r're?st', re.IGNORECASE)

41

PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)

41

PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)

42

43

def __detect_renderer(self, source, filename=None):

43

def _detect_renderer(self, source, filename=None):

44

"""

44

"""

45

runs detection of what renderer should be used for generating html

45

runs detection of what renderer should be used for generating html

46

from a markup language

46

from a markup language

47

48

filename can be also explicitly a renderer name

48

filename can be also explicitly a renderer name

49

50

:param source:

50

:param source:

51

:param filename:

51

:param filename:

52

"""

52

"""

53

54

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

54

if MarkupRenderer.MARKDOWN_PAT.findall(filename):

55

detected_renderer = 'markdown'

55

detected_renderer = 'markdown'

56

elif MarkupRenderer.RST_PAT.findall(filename):

56

elif MarkupRenderer.RST_PAT.findall(filename):

57

detected_renderer = 'rst'

57

detected_renderer = 'rst'

58

elif MarkupRenderer.PLAIN_PAT.findall(filename):

58

elif MarkupRenderer.PLAIN_PAT.findall(filename):

59

detected_renderer = 'rst'

59

detected_renderer = 'rst'

60

else:

60

else:

61

detected_renderer = 'plain'

61

detected_renderer = 'plain'

62

63

return getattr(MarkupRenderer, detected_renderer)

63

return getattr(MarkupRenderer, detected_renderer)

64

65

@classmethod

66

def _flavored_markdown(cls, text):

67

"""

68

Github style flavored markdown

69

70

:param text:

71

"""

72

from hashlib import md5

73

74

# Extract pre blocks.

75

extractions = {}

76

def pre_extraction_callback(matchobj):

77

digest = md5(matchobj.group(0)).hexdigest()

78

extractions[digest] = matchobj.group(0)

79

return "{gfm-extraction-%s}" % digest

80

pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)

81

text = re.sub(pattern, pre_extraction_callback, text)

82

83

# Prevent foo_bar_baz from ending up with an italic word in the middle.

84

def italic_callback(matchobj):

85

s = matchobj.group(0)

86

if list(s).count('_') >= 2:

87

return s.replace('_', '\_')

88

return s

89

text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

90

91

# In very clear cases, let newlines become tags.

92

def newline_callback(matchobj):

93

if len(matchobj.group(1)) == 1:

94

return matchobj.group(0).rstrip() + ' \n'

95

else:

96

return matchobj.group(0)

97

pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)

98

text = re.sub(pattern, newline_callback, text)

99

100

# Insert pre block extractions.

101

def pre_insert_callback(matchobj):

102

return '\n\n' + extractions[matchobj.group(1)]

103

text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',

104

pre_insert_callback, text)

105

106

return text

107

65

def render(self, source, filename=None):

108

def render(self, source, filename=None):

66

"""

109

"""

67

Renders a given filename using detected renderer

110

Renders a given filename using detected renderer

68

it detects renderers based on file extension or mimetype.

111

it detects renderers based on file extension or mimetype.

69

At last it will just do a simple html replacing new lines with

112

At last it will just do a simple html replacing new lines with

70

113

71

:param file_name:

114

:param file_name:

72

:param source:

115

:param source:

73

"""

116

"""

74

117

75

renderer = self.__detect_renderer(source, filename)

118

renderer = self._detect_renderer(source, filename)

76

readme_data = renderer(source)

119

readme_data = renderer(source)

77

return readme_data

120

return readme_data

78

121

79

@classmethod

122

@classmethod

80

def plain(cls, source):

123

def plain(cls, source):

81

source = safe_unicode(source)

124

source = safe_unicode(source)

82

125

83

def urlify_text(text):

126

def urlify_text(text):

84

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

127

url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'

85

'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

128

'|[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')

86

129

87

def url_func(match_obj):

130

def url_func(match_obj):

88

url_full = match_obj.groups()[0]

131

url_full = match_obj.groups()[0]

89

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

132

return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})

90

133

91

return url_pat.sub(url_func, text)

134

return url_pat.sub(url_func, text)

92

135

93

source = urlify_text(source)

136

source = urlify_text(source)

94

return ' ' + source.replace("\n", ' ')

137

return ' ' + source.replace("\n", ' ')

95

138

96

@classmethod

139

@classmethod

97

def markdown(cls, source, safe=True):

140

def markdown(cls, source, safe=True, flavored=False):

98

source = safe_unicode(source)

141

source = safe_unicode(source)

99

try:

142

try:

100

import markdown as __markdown

143

import markdown as __markdown

144

if flavored:

145

source = cls._flavored_markdown(source)

101

return __markdown.markdown(source, ['codehilite', 'extra'])

146

return __markdown.markdown(source, ['codehilite', 'extra'])

102

except ImportError:

147

except ImportError:

103

log.warning('Install markdown to use this function')

148

log.warning('Install markdown to use this function')

104

return cls.plain(source)

149

return cls.plain(source)

105

except Exception:

150

except Exception:

106

log.error(traceback.format_exc())

151

log.error(traceback.format_exc())

107

if safe:

152

if safe:

108

return source

153

return source

109

else:

154

else:

110

raise

155

raise

111

156

112

@classmethod

157

@classmethod

113

def rst(cls, source, safe=True):

158

def rst(cls, source, safe=True):

114

source = safe_unicode(source)

159

source = safe_unicode(source)

115

try:

160

try:

116

from docutils.core import publish_parts

161

from docutils.core import publish_parts

117

from docutils.parsers.rst import directives

162

from docutils.parsers.rst import directives

118

docutils_settings = dict([(alias, None) for alias in

163

docutils_settings = dict([(alias, None) for alias in

119

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

164

cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])

120

165

121

docutils_settings.update({'input_encoding': 'unicode',

166

docutils_settings.update({'input_encoding': 'unicode',

122

'report_level': 4})

167

'report_level': 4})

123

168

124

for k, v in docutils_settings.iteritems():

169

for k, v in docutils_settings.iteritems():

125

directives.register_directive(k, v)

170

directives.register_directive(k, v)

126

171

127

parts = publish_parts(source=source,

172

parts = publish_parts(source=source,

128

writer_name="html4css1",

173

writer_name="html4css1",

129

settings_overrides=docutils_settings)

174

settings_overrides=docutils_settings)

130

175

131

return parts['html_title'] + parts["fragment"]

176

return parts['html_title'] + parts["fragment"]

132

except ImportError:

177

except ImportError:

133

log.warning('Install docutils to use this function')

178

log.warning('Install docutils to use this function')

134

return cls.plain(source)

179

return cls.plain(source)

135

except Exception:

180

except Exception:

136

log.error(traceback.format_exc())

181

log.error(traceback.format_exc())

137

if safe:

182

if safe:

138

return source

183

return source

139

else:

184

else:

140

raise

185

raise

141

186

142

@classmethod

187

@classmethod

143

def rst_with_mentions(cls, source):

188

def rst_with_mentions(cls, source):

144

mention_pat = re.compile(MENTIONS_REGEX)

189

mention_pat = re.compile(MENTIONS_REGEX)

145

190

146

def wrapp(match_obj):

191

def wrapp(match_obj):

147

uname = match_obj.groups()[0]

192

uname = match_obj.groups()[0]

148

return ' **@%(uname)s** ' % {'uname': uname}

193

return ' **@%(uname)s** ' % {'uname': uname}

149

mention_hl = mention_pat.sub(wrapp, source).strip()

194

mention_hl = mention_pat.sub(wrapp, source).strip()

150

return cls.rst(mention_hl)

195

return cls.rst(mention_hl)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
                 rhodecode.lib.markup_renderer
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                 Renderer for markup languages with ability to parse using rst or markdown
                 :created_on: Oct 27, 2011
                 :author: marcink
                 :copyright: (C) 2011-2012 Marcin Kuzminski <marcin@python-works.com>
                 :license: GPLv3, see COPYING for more details.
             """
             # This program is free software: you can redistribute it and/or modify
             # it under the terms of the GNU General Public License as published by
             # the Free Software Foundation, either version 3 of the License, or
             # (at your option) any later version.
             #
             # This program is distributed in the hope that it will be useful,
             # but WITHOUT ANY WARRANTY; without even the implied warranty of
             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
             # GNU General Public License for more details.
             #
             # You should have received a copy of the GNU General Public License
             # along with this program.  If not, see <http://www.gnu.org/licenses/>.
             import re
             import logging
             import traceback
             from rhodecode.lib.utils2 import safe_unicode, MENTIONS_REGEX
             log = logging.getLogger(__name__)
             class MarkupRenderer(object):
                 RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES = ['include', 'meta', 'raw']
                 MARKDOWN_PAT = re.compile(r'md|mkdn?|mdown|markdown', re.IGNORECASE)
                 RST_PAT = re.compile(r're?st', re.IGNORECASE)
                 PLAIN_PAT = re.compile(r'readme', re.IGNORECASE)
-                def __detect_renderer(self, source, filename=None):
+                def _detect_renderer(self, source, filename=None):
                     """
                     runs detection of what renderer should be used for generating html
                     from a markup language
                     filename can be also explicitly a renderer name
                     :param source:
                     :param filename:
                     """
                     if MarkupRenderer.MARKDOWN_PAT.findall(filename):
                         detected_renderer = 'markdown'
                     elif MarkupRenderer.RST_PAT.findall(filename):
                         detected_renderer = 'rst'
                     elif MarkupRenderer.PLAIN_PAT.findall(filename):
                         detected_renderer = 'rst'
                     else:
                         detected_renderer = 'plain'
                     return getattr(MarkupRenderer, detected_renderer)
+                @classmethod
+                def _flavored_markdown(cls, text):
+                    """
+                    Github style flavored markdown
+                    :param text:
+                    """
+                    from hashlib import md5
+                    # Extract pre blocks.
+                    extractions = {}
+                    def pre_extraction_callback(matchobj):
+                        digest = md5(matchobj.group(0)).hexdigest()
+                        extractions[digest] = matchobj.group(0)
+                        return "{gfm-extraction-%s}" % digest
+                    pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
+                    text = re.sub(pattern, pre_extraction_callback, text)
+                    # Prevent foo_bar_baz from ending up with an italic word in the middle.
+                    def italic_callback(matchobj):
+                        s = matchobj.group(0)
+                        if list(s).count('_') >= 2:
+                            return s.replace('_', '\_')
+                        return s
+                    text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)
+                    # In very clear cases, let newlines become <br /> tags.
+                    def newline_callback(matchobj):
+                        if len(matchobj.group(1)) == 1:
+                            return matchobj.group(0).rstrip() + '  \n'
+                        else:
+                            return matchobj.group(0)
+                    pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
+                    text = re.sub(pattern, newline_callback, text)
+                    # Insert pre block extractions.
+                    def pre_insert_callback(matchobj):
+                        return '\n\n' + extractions[matchobj.group(1)]
+                    text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}',
+                                  pre_insert_callback, text)
+                    return text
                 def render(self, source, filename=None):
                     """
                     Renders a given filename using detected renderer
                     it detects renderers based on file extension or mimetype.
                     At last it will just do a simple html replacing new lines with <br/>
                     :param file_name:
                     :param source:
                     """
-                    renderer = self.__detect_renderer(source, filename)
+                    renderer = self._detect_renderer(source, filename)
                     readme_data = renderer(source)
                     return readme_data
                 @classmethod
                 def plain(cls, source):
                     source = safe_unicode(source)
                     def urlify_text(text):
                         url_pat = re.compile(r'(http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]'
                                              '|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+)')
                         def url_func(match_obj):
                             url_full = match_obj.groups()[0]
                             return '<a href="%(url)s">%(url)s</a>' % ({'url': url_full})
                         return url_pat.sub(url_func, text)
                     source = urlify_text(source)
                     return '<br />' + source.replace("\n", '<br />')
                 @classmethod
-                def markdown(cls, source, safe=True):
+                def markdown(cls, source, safe=True, flavored=False):
                     source = safe_unicode(source)
                     try:
                         import markdown as __markdown
+                        if flavored:
+                            source = cls._flavored_markdown(source)
                         return __markdown.markdown(source, ['codehilite', 'extra'])
                     except ImportError:
                         log.warning('Install markdown to use this function')
                         return cls.plain(source)
                     except Exception:
                         log.error(traceback.format_exc())
                         if safe:
                             return source
                         else:
                             raise
                 @classmethod
                 def rst(cls, source, safe=True):
                     source = safe_unicode(source)
                     try:
                         from docutils.core import publish_parts
                         from docutils.parsers.rst import directives
                         docutils_settings = dict([(alias, None) for alias in
                                             cls.RESTRUCTUREDTEXT_DISALLOWED_DIRECTIVES])
                         docutils_settings.update({'input_encoding': 'unicode',
                                                   'report_level': 4})
                         for k, v in docutils_settings.iteritems():
                             directives.register_directive(k, v)
                         parts = publish_parts(source=source,
                                               writer_name="html4css1",
                                               settings_overrides=docutils_settings)
                         return parts['html_title'] + parts["fragment"]
                     except ImportError:
                         log.warning('Install docutils to use this function')
                         return cls.plain(source)
                     except Exception:
                         log.error(traceback.format_exc())
                         if safe:
                             return source
                         else:
                             raise
                 @classmethod
                 def rst_with_mentions(cls, source):
                     mention_pat = re.compile(MENTIONS_REGEX)
                     def wrapp(match_obj):
                         uname = match_obj.groups()[0]
                         return ' **@%(uname)s** ' % {'uname': uname}
                     mention_hl = mention_pat.sub(wrapp, source).strip()
                     return cls.rst(mention_hl)